diff options
Diffstat (limited to 'compiler')
152 files changed, 21036 insertions, 4785 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index a879bd8f06..6472613cfe 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -28,6 +28,81 @@ package { default_applicable_licenses: ["art_license"], } +// Common dependencies for libart-compiler_deps and libartd-compiler_deps. +cc_defaults { + name: "libart-compiler_common_deps", + shared_libs: [ + "libbase", + "liblzma", // libelffile(d) dependency; must be repeated here since it's a static lib. + "libartpalette", + ], + header_libs: [ + "libart_generated_headers", + ], +} + +// Dependencies of libart-compiler, used to propagate libart-compiler deps when static linking. +art_cc_defaults { + name: "libart-compiler_deps", + defaults: ["libart-compiler_common_deps"], + shared_libs: [ + "libartbase", + "libprofile", + "libdexfile", + ], + static_libs: ["libelffile"], + codegen: { + arm: { + // VIXL assembly support for ARM targets. + static_libs: [ + "libvixl", + ], + }, + arm64: { + // VIXL assembly support for ARM64 targets. + static_libs: [ + "libvixl", + ], + }, + }, + runtime_libs: [ + // `art::HGraphVisualizerDisassembler::HGraphVisualizerDisassembler` may dynamically load + // `libart-disassembler.so`. + "libart-disassembler", + ], +} + +// Dependencies of libartd-compiler, used to propagate libartd-compiler deps when static linking. +art_cc_defaults { + name: "libartd-compiler_deps", + defaults: ["libart-compiler_common_deps"], + shared_libs: [ + "libartbased", + "libprofiled", + "libdexfiled", + ], + static_libs: ["libelffiled"], + codegen: { + arm: { + // VIXL assembly support for ARM targets. + static_libs: [ + "libvixld", + ], + }, + arm64: { + // VIXL assembly support for ARM64 targets. + static_libs: [ + "libvixld", + ], + }, + }, + runtime_libs: [ + // `art::HGraphVisualizerDisassembler::HGraphVisualizerDisassembler` may dynamically load + // `libartd-disassembler.so`. + "libartd-disassembler", + ], +} + art_cc_defaults { name: "libart-compiler-defaults", defaults: ["art_defaults"], @@ -46,6 +121,7 @@ art_cc_defaults { "optimizing/bounds_check_elimination.cc", "optimizing/builder.cc", "optimizing/cha_guard_optimization.cc", + "optimizing/code_generation_data.cc", "optimizing/code_generator.cc", "optimizing/code_generator_utils.cc", "optimizing/code_sinking.cc", @@ -80,7 +156,6 @@ art_cc_defaults { "optimizing/reference_type_propagation.cc", "optimizing/register_allocation_resolver.cc", "optimizing/register_allocator.cc", - "optimizing/register_allocator_graph_color.cc", "optimizing/register_allocator_linear_scan.cc", "optimizing/select_generator.cc", "optimizing/scheduler.cc", @@ -122,6 +197,7 @@ art_cc_defaults { "optimizing/code_generator_arm64.cc", "optimizing/code_generator_vector_arm64_neon.cc", "optimizing/code_generator_vector_arm64_sve.cc", + "optimizing/jit_patches_arm64.cc", "optimizing/scheduler_arm64.cc", "optimizing/instruction_simplifier_arm64.cc", "optimizing/intrinsics_arm64.cc", @@ -132,6 +208,11 @@ art_cc_defaults { }, riscv64: { srcs: [ + "jni/quick/riscv64/calling_convention_riscv64.cc", + "optimizing/code_generator_riscv64.cc", + "optimizing/intrinsics_riscv64.cc", + "utils/riscv64/assembler_riscv64.cc", + "utils/riscv64/jni_macro_assembler_riscv64.cc", "utils/riscv64/managed_register_riscv64.cc", ], }, @@ -210,35 +291,13 @@ art_cc_library { "libart-compiler-defaults", "dex2oat-pgo-defaults", "art_hugepage_defaults", + "libart-compiler_deps", ], - codegen: { - arm: { - // VIXL assembly support for ARM targets. - static_libs: [ - "libvixl", - ], - }, - arm64: { - // VIXL assembly support for ARM64 targets. - static_libs: [ - "libvixl", - ], - }, - }, shared_libs: [ + // libart is not included in libart-compiler_deps to allow libart-compiler(-for-test) + // select suitable libart library (either with or without LTO). "libart", - "libartbase", - "libartpalette", - "libprofile", - "libdexfile", ], - static_libs: ["libelffile"], - runtime_libs: [ - // `art::HGraphVisualizerDisassembler::HGraphVisualizerDisassembler` may dynamically load - // `libart-disassembler.so`. - "libart-disassembler", - ], - target: { android: { lto: { @@ -253,6 +312,22 @@ art_cc_library { ], } +// For static linking with gtests. Same as `libart-compiler`, but without LTO. +// When gtests static link a library with LTO enabled, they are also built with LTO. +// This makes the build process use a lot of memory. b/277207452 +art_cc_library_static { + name: "libart-compiler-for-test", + defaults: [ + "libart-compiler-defaults", + "dex2oat-pgo-defaults", + "art_hugepage_defaults", + "libart-compiler_deps", + ], + header_libs: [ + "libart_headers", + ], +} + cc_defaults { name: "libart-compiler_static_defaults", defaults: [ @@ -266,48 +341,32 @@ cc_defaults { whole_static_libs: ["libart-compiler"], } +// libart-compiler_static_defaults for standalone gtests. +// Uses libart-for-test_static_defaults instead of libart_static_defaults. +// Uses libart-compiler-for-test instead of libart-compiler. +cc_defaults { + name: "libart-compiler-for-test_static_defaults", + defaults: [ + "libart-compiler_static_base_defaults", + "libart-disassembler_static_defaults", + "libart-for-test_static_defaults", + "libartbase_static_defaults", + "libdexfile_static_defaults", + "libprofile_static_defaults", + ], + whole_static_libs: ["libart-compiler-for-test"], +} + art_cc_library { name: "libartd-compiler", defaults: [ "art_debug_defaults", "libart-compiler-defaults", + "libartd-compiler_deps", ], - codegen: { - arm: { - // VIXL assembly support for ARM targets. - static_libs: [ - "libvixld", - ], - // Export vixl headers as they are included in this library's headers used by tests. - export_static_lib_headers: [ - "libvixld", - ], - }, - arm64: { - // VIXL assembly support for ARM64 targets. - static_libs: [ - "libvixld", - ], - // Export vixl headers as they are included in this library's headers used by tests. - export_static_lib_headers: [ - "libvixld", - ], - }, - }, shared_libs: [ - "libartbased", "libartd", - "libartpalette", - "libprofiled", - "libdexfiled", ], - static_libs: ["libelffiled"], - runtime_libs: [ - // `art::HGraphVisualizerDisassembler::HGraphVisualizerDisassembler` may dynamically load - // `libartd-disassembler.so`. - "libartd-disassembler", - ], - apex_available: [ "com.android.art.debug", // TODO(b/183882457): This lib doesn't go into com.android.art, but @@ -330,42 +389,47 @@ cc_defaults { whole_static_libs: ["libartd-compiler"], } +// libartd-compiler_static_defaults for gtests. +// Uses libartd-for-test_static_defaults instead of libart_static_defaults. +// Note that `libartd-compiler-for-test` is not required here, because `libartd-compiler` +// doesn't use LTO. +cc_defaults { + name: "libartd-compiler-for-test_static_defaults", + defaults: [ + "libart-compiler_static_base_defaults", + "libartbased_static_defaults", + "libartd-disassembler_static_defaults", + "libartd-for-test_static_defaults", + "libdexfiled_static_defaults", + "libprofiled_static_defaults", + ], + whole_static_libs: ["libartd-compiler"], +} + // Properties common to `libart-compiler-gtest` and `libartd-compiler-gtest`. art_cc_defaults { name: "libart-compiler-gtest-common", srcs: [ "common_compiler_test.cc", ], - shared_libs: [ - "libbase", - ], } -art_cc_library { +art_cc_library_static { name: "libart-compiler-gtest", defaults: [ "libart-gtest-defaults", "libart-compiler-gtest-common", - ], - shared_libs: [ - "libart-compiler", - "libart-disassembler", - "libartbase-art-gtest", - "libart-runtime-gtest", + "libart-compiler-for-test_static_defaults", ], } -art_cc_library { +art_cc_library_static { name: "libartd-compiler-gtest", defaults: [ - "libartd-gtest-defaults", + "art_debug_defaults", + "libart-gtest-defaults", "libart-compiler-gtest-common", - ], - shared_libs: [ - "libartd-compiler", - "libartd-disassembler", - "libartbased-art-gtest", - "libartd-runtime-gtest", + "libartd-compiler-for-test_static_defaults", ], } @@ -474,11 +538,6 @@ art_cc_defaults { "libnativehelper_header_only", ], - shared_libs: [ - "libnativeloader", - "libunwindstack", - ], - target: { host: { shared_libs: [ @@ -496,15 +555,8 @@ art_cc_test { "art_gtest_defaults", "art_compiler_tests_defaults", ], - shared_libs: [ - "libprofiled", - "libartd-simulator-container", - "liblzma", - ], static_libs: [ - "libartd-compiler", - "libelffiled", - "libvixld", + "libartd-simulator-container", ], } @@ -516,21 +568,8 @@ art_cc_test { "art_compiler_tests_defaults", ], data: [":generate-boot-image"], - shared_libs: [ - "libprofile", - "liblzma", - "libartpalette", - ], static_libs: [ - // For now, link `libart-simulator-container` statically for simplicity, - // to save the added complexity to package it in test suites (along with - // other test artifacts) and install it on device during tests. - // TODO(b/192070541): Consider linking `libart-simulator-container` - // dynamically. "libart-simulator-container", - "libart-compiler", - "libelffile", - "libvixl", ], test_config: "art_standalone_compiler_tests.xml", } @@ -551,6 +590,12 @@ art_cc_test { "utils/assembler_thumb_test.cc", ], }, + riscv64: { + srcs: [ + "utils/riscv64/assembler_riscv64_test.cc", + "utils/riscv64/jni_macro_assembler_riscv64_test.cc", + ], + }, x86: { srcs: [ "utils/x86/assembler_x86_test.cc", @@ -562,12 +607,7 @@ art_cc_test { ], }, }, - shared_libs: [ - "liblzma", - ], static_libs: [ - "libartd-compiler", - "libelffiled", "libvixld", ], } diff --git a/compiler/art_standalone_compiler_tests.xml b/compiler/art_standalone_compiler_tests.xml index 394ac8d4fb..8e8636cca4 100644 --- a/compiler/art_standalone_compiler_tests.xml +++ b/compiler/art_standalone_compiler_tests.xml @@ -15,6 +15,7 @@ --> <configuration description="Runs art_standalone_compiler_tests."> <option name="config-descriptor:metadata" key="mainline-param" value="com.google.android.art.apex" /> + <option name="config-descriptor:metadata" key="mainline-param" value="com.android.art.apex" /> <target_preparer class="com.android.compatibility.common.tradefed.targetprep.FilePusher"> <option name="cleanup" value="true" /> diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h index e65bee8e2e..6835e92dfd 100644 --- a/compiler/cfi_test.h +++ b/compiler/cfi_test.h @@ -131,7 +131,7 @@ class CFITest : public dwarf::DwarfTest { } // Use the .cfi_ prefix. new_line = ".cfi_" + new_line.substr(FindEndOf(new_line, "DW_CFA_")); - output->push_back(address + ": " + new_line); + output->push_back(ART_FORMAT("{}: {}", address, new_line)); } } } diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index 442b96e5fa..a37f516759 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -133,9 +133,9 @@ class CommonCompilerTestImpl::OneCompiledMethodStorage final : public CompiledCo CompiledMethod* CreateCompiledMethod(InstructionSet instruction_set, ArrayRef<const uint8_t> code, ArrayRef<const uint8_t> stack_map, - ArrayRef<const uint8_t> cfi ATTRIBUTE_UNUSED, + [[maybe_unused]] ArrayRef<const uint8_t> cfi, ArrayRef<const linker::LinkerPatch> patches, - bool is_intrinsic ATTRIBUTE_UNUSED) override { + [[maybe_unused]] bool is_intrinsic) override { // Supports only one method at a time. CHECK_EQ(instruction_set_, InstructionSet::kNone); CHECK_NE(instruction_set, InstructionSet::kNone); @@ -150,15 +150,15 @@ class CommonCompilerTestImpl::OneCompiledMethodStorage final : public CompiledCo return reinterpret_cast<CompiledMethod*>(this); } - ArrayRef<const uint8_t> GetThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED, - /*out*/ std::string* debug_name ATTRIBUTE_UNUSED) override { + ArrayRef<const uint8_t> GetThunkCode([[maybe_unused]] const linker::LinkerPatch& patch, + [[maybe_unused]] /*out*/ std::string* debug_name) override { LOG(FATAL) << "Unsupported."; UNREACHABLE(); } - void SetThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED, - ArrayRef<const uint8_t> code ATTRIBUTE_UNUSED, - const std::string& debug_name ATTRIBUTE_UNUSED) override { + void SetThunkCode([[maybe_unused]] const linker::LinkerPatch& patch, + [[maybe_unused]] ArrayRef<const uint8_t> code, + [[maybe_unused]] const std::string& debug_name) override { LOG(FATAL) << "Unsupported."; UNREACHABLE(); } diff --git a/compiler/compiler.h b/compiler/compiler.h index ce785bb769..6c317f7e02 100644 --- a/compiler/compiler.h +++ b/compiler/compiler.h @@ -73,12 +73,12 @@ class Compiler { const DexFile& dex_file, Handle<mirror::DexCache> dex_cache) const = 0; - virtual bool JitCompile(Thread* self ATTRIBUTE_UNUSED, - jit::JitCodeCache* code_cache ATTRIBUTE_UNUSED, - jit::JitMemoryRegion* region ATTRIBUTE_UNUSED, - ArtMethod* method ATTRIBUTE_UNUSED, - CompilationKind compilation_kind ATTRIBUTE_UNUSED, - jit::JitLogger* jit_logger ATTRIBUTE_UNUSED) + virtual bool JitCompile([[maybe_unused]] Thread* self, + [[maybe_unused]] jit::JitCodeCache* code_cache, + [[maybe_unused]] jit::JitMemoryRegion* region, + [[maybe_unused]] ArtMethod* method, + [[maybe_unused]] CompilationKind compilation_kind, + [[maybe_unused]] jit::JitLogger* jit_logger) REQUIRES_SHARED(Locks::mutator_lock_) { return false; } diff --git a/compiler/compiler_reflection_test.cc b/compiler/compiler_reflection_test.cc index f3c07db136..d8e2b9e5b9 100644 --- a/compiler/compiler_reflection_test.cc +++ b/compiler/compiler_reflection_test.cc @@ -29,6 +29,7 @@ namespace art HIDDEN { class CompilerReflectionTest : public CommonCompilerTest {}; TEST_F(CompilerReflectionTest, StaticMainMethod) { + TEST_DISABLED_FOR_RISCV64(); ScopedObjectAccess soa(Thread::Current()); jobject jclass_loader = LoadDex("Main"); StackHandleScope<1> hs(soa.Self()); diff --git a/compiler/debug/elf_debug_frame_writer.h b/compiler/debug/elf_debug_frame_writer.h index 6b72262e26..fe98a578b1 100644 --- a/compiler/debug/elf_debug_frame_writer.h +++ b/compiler/debug/elf_debug_frame_writer.h @@ -90,7 +90,26 @@ static void WriteCIE(InstructionSet isa, /*inout*/ std::vector<uint8_t>* buffer) return; } case InstructionSet::kRiscv64: { - UNIMPLEMENTED(FATAL); + dwarf::DebugFrameOpCodeWriter<> opcodes; + opcodes.DefCFA(Reg::Riscv64Core(2), 0); // X2(SP). + // core registers. + for (int reg = 3; reg < 32; reg++) { // Skip X0 (Zero), X1 (RA) and X2 (SP). + if ((reg >= 5 && reg < 8) || (reg >= 10 && reg < 18) || reg >= 28) { + opcodes.Undefined(Reg::Riscv64Core(reg)); + } else { + opcodes.SameValue(Reg::Riscv64Core(reg)); + } + } + // fp registers. + for (int reg = 0; reg < 32; reg++) { + if (reg < 8 || (reg >=10 && reg < 18) || reg >= 28) { + opcodes.Undefined(Reg::Riscv64Fp(reg)); + } else { + opcodes.SameValue(Reg::Riscv64Fp(reg)); + } + } + auto return_reg = Reg::Riscv64Core(1); // X1(RA). + WriteCIE(is64bit, return_reg, opcodes, buffer); return; } case InstructionSet::kX86: { diff --git a/compiler/debug/elf_debug_line_writer.h b/compiler/debug/elf_debug_line_writer.h index 4896bc1e9b..5d654e3e06 100644 --- a/compiler/debug/elf_debug_line_writer.h +++ b/compiler/debug/elf_debug_line_writer.h @@ -194,7 +194,7 @@ class ElfDebugLineWriter { } else { directory_index = it->second; } - full_path = package_name + "/" + file_name; + full_path = ART_FORMAT("{}/{}", package_name, file_name); } // Add file entry. diff --git a/compiler/debug/elf_debug_writer.cc b/compiler/debug/elf_debug_writer.cc index 8f64d73aa7..505b6c5d8a 100644 --- a/compiler/debug/elf_debug_writer.cc +++ b/compiler/debug/elf_debug_writer.cc @@ -113,7 +113,7 @@ void WriteDebugInfo(ElfBuilder<ElfTypes>* builder, template <typename ElfTypes> static std::vector<uint8_t> MakeMiniDebugInfoInternal( InstructionSet isa, - const InstructionSetFeatures* features ATTRIBUTE_UNUSED, + [[maybe_unused]] const InstructionSetFeatures* features, typename ElfTypes::Addr text_section_address, size_t text_section_size, typename ElfTypes::Addr dex_section_address, @@ -172,11 +172,10 @@ std::vector<uint8_t> MakeMiniDebugInfo( } } -std::vector<uint8_t> MakeElfFileForJIT( - InstructionSet isa, - const InstructionSetFeatures* features ATTRIBUTE_UNUSED, - bool mini_debug_info, - const MethodDebugInfo& method_info) { +std::vector<uint8_t> MakeElfFileForJIT(InstructionSet isa, + [[maybe_unused]] const InstructionSetFeatures* features, + bool mini_debug_info, + const MethodDebugInfo& method_info) { using ElfTypes = ElfRuntimeTypes; CHECK_EQ(sizeof(ElfTypes::Addr), static_cast<size_t>(GetInstructionSetPointerSize(isa))); CHECK_EQ(method_info.is_code_address_text_relative, false); @@ -213,13 +212,12 @@ std::vector<uint8_t> MakeElfFileForJIT( DCHECK_EQ(sym.st_size, method_info.code_size); num_syms++; }); - reader.VisitDebugFrame([&](const Reader::CIE* cie ATTRIBUTE_UNUSED) { - num_cies++; - }, [&](const Reader::FDE* fde, const Reader::CIE* cie ATTRIBUTE_UNUSED) { - DCHECK_EQ(fde->sym_addr, method_info.code_address); - DCHECK_EQ(fde->sym_size, method_info.code_size); - num_fdes++; - }); + reader.VisitDebugFrame([&]([[maybe_unused]] const Reader::CIE* cie) { num_cies++; }, + [&](const Reader::FDE* fde, [[maybe_unused]] const Reader::CIE* cie) { + DCHECK_EQ(fde->sym_addr, method_info.code_address); + DCHECK_EQ(fde->sym_size, method_info.code_size); + num_fdes++; + }); DCHECK_EQ(num_syms, 1u); DCHECK_LE(num_cies, 1u); DCHECK_LE(num_fdes, 1u); @@ -302,18 +300,20 @@ std::vector<uint8_t> PackElfFileForJIT( // ART always produces the same CIE, so we copy the first one and ignore the rest. bool copied_cie = false; for (Reader& reader : readers) { - reader.VisitDebugFrame([&](const Reader::CIE* cie) { - if (!copied_cie) { - debug_frame->WriteFully(cie->data(), cie->size()); - copied_cie = true; - } - }, [&](const Reader::FDE* fde, const Reader::CIE* cie ATTRIBUTE_UNUSED) { - DCHECK(copied_cie); - DCHECK_EQ(fde->cie_pointer, 0); - if (!is_removed_symbol(fde->sym_addr)) { - debug_frame->WriteFully(fde->data(), fde->size()); - } - }); + reader.VisitDebugFrame( + [&](const Reader::CIE* cie) { + if (!copied_cie) { + debug_frame->WriteFully(cie->data(), cie->size()); + copied_cie = true; + } + }, + [&](const Reader::FDE* fde, [[maybe_unused]] const Reader::CIE* cie) { + DCHECK(copied_cie); + DCHECK_EQ(fde->cie_pointer, 0); + if (!is_removed_symbol(fde->sym_addr)) { + debug_frame->WriteFully(fde->data(), fde->size()); + } + }); } debug_frame->End(); @@ -348,9 +348,8 @@ std::vector<uint8_t> PackElfFileForJIT( std::vector<uint8_t> WriteDebugElfFileForClasses( InstructionSet isa, - const InstructionSetFeatures* features ATTRIBUTE_UNUSED, - const ArrayRef<mirror::Class*>& types) - REQUIRES_SHARED(Locks::mutator_lock_) { + [[maybe_unused]] const InstructionSetFeatures* features, + const ArrayRef<mirror::Class*>& types) REQUIRES_SHARED(Locks::mutator_lock_) { using ElfTypes = ElfRuntimeTypes; CHECK_EQ(sizeof(ElfTypes::Addr), static_cast<size_t>(GetInstructionSetPointerSize(isa))); std::vector<uint8_t> buffer; diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc index 603596f3bc..d0770e952b 100644 --- a/compiler/driver/compiler_options.cc +++ b/compiler/driver/compiler_options.cc @@ -57,7 +57,7 @@ CompilerOptions::CompilerOptions() generate_debug_info_(kDefaultGenerateDebugInfo), generate_mini_debug_info_(kDefaultGenerateMiniDebugInfo), generate_build_id_(false), - implicit_null_checks_(true), + implicit_null_checks_(false), implicit_so_checks_(true), implicit_suspend_checks_(false), compile_pic_(false), @@ -121,7 +121,8 @@ bool CompilerOptions::ParseRegisterAllocationStrategy(const std::string& option, if (option == "linear-scan") { register_allocation_strategy_ = RegisterAllocator::Strategy::kRegisterAllocatorLinearScan; } else if (option == "graph-color") { - register_allocation_strategy_ = RegisterAllocator::Strategy::kRegisterAllocatorGraphColor; + LOG(ERROR) << "Graph coloring allocator has been removed, using linear scan instead."; + register_allocation_strategy_ = RegisterAllocator::Strategy::kRegisterAllocatorLinearScan; } else { *error_msg = "Unrecognized register allocation strategy. Try linear-scan, or graph-color."; return false; diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index c8a41ce24b..a5b3ae17d0 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -42,6 +42,7 @@ class VerifierDepsTest; namespace linker { class Arm64RelativePatcherTest; +class Thumb2RelativePatcherTest; } // namespace linker class ArtMethod; @@ -115,9 +116,7 @@ class CompilerOptions final { } bool IsAnyCompilationEnabled() const { - return CompilerFilter::IsAnyCompilationEnabled(compiler_filter_) && - // TODO(riscv64): remove this when we have compiler support for RISC-V - GetInstructionSet() != InstructionSet::kRiscv64; + return CompilerFilter::IsAnyCompilationEnabled(compiler_filter_); } size_t GetHugeMethodThreshold() const { @@ -504,6 +503,7 @@ class CompilerOptions final { friend class jit::JitCompiler; friend class verifier::VerifierDepsTest; friend class linker::Arm64RelativePatcherTest; + friend class linker::Thumb2RelativePatcherTest; template <class Base> friend bool ReadCompilerOptions(Base& map, CompilerOptions* options, std::string* error_msg); diff --git a/compiler/driver/compiler_options_map-inl.h b/compiler/driver/compiler_options_map-inl.h index 79a59625f5..8530df37e4 100644 --- a/compiler/driver/compiler_options_map-inl.h +++ b/compiler/driver/compiler_options_map-inl.h @@ -117,7 +117,7 @@ inline bool ReadCompilerOptions(Base& map, CompilerOptions* options, std::string #pragma GCC diagnostic ignored "-Wframe-larger-than=" template <typename Map, typename Builder> -inline void AddCompilerOptionsArgumentParserOptions(Builder& b) { +NO_INLINE void AddCompilerOptionsArgumentParserOptions(Builder& b) { // clang-format off b. Define("--compiler-filter=_") diff --git a/compiler/exception_test.cc b/compiler/exception_test.cc index 82c4998217..75ade55799 100644 --- a/compiler/exception_test.cc +++ b/compiler/exception_test.cc @@ -69,9 +69,10 @@ class ExceptionTest : public CommonRuntimeTest { dex_ = my_klass_->GetDexCache()->GetDexFile(); + std::vector<uint8_t> fake_code; uint32_t code_size = 12; for (size_t i = 0 ; i < code_size; i++) { - fake_code_.push_back(0x70 | i); + fake_code.push_back(0x70 | i); } const uint32_t native_pc_offset = 4u; @@ -96,16 +97,23 @@ class ExceptionTest : public CommonRuntimeTest { const size_t header_size = sizeof(OatQuickMethodHeader); const size_t code_alignment = GetInstructionSetCodeAlignment(kRuntimeISA); - fake_header_code_and_maps_.resize(stack_maps_size + header_size + code_size + code_alignment); - // NB: The start of the vector might not have been allocated the desired alignment. + fake_header_code_and_maps_size_ = stack_maps_size + header_size + code_size + code_alignment; + // Use mmap to make sure we get untagged memory here. Real code gets allocated using + // mspace_memalign which is never tagged. + fake_header_code_and_maps_ = static_cast<uint8_t*>(mmap(nullptr, + fake_header_code_and_maps_size_, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, + 0)); uint8_t* code_ptr = AlignUp(&fake_header_code_and_maps_[stack_maps_size + header_size], code_alignment); memcpy(&fake_header_code_and_maps_[0], stack_map.data(), stack_maps_size); - OatQuickMethodHeader method_header(code_ptr - fake_header_code_and_maps_.data()); + OatQuickMethodHeader method_header(code_ptr - fake_header_code_and_maps_); static_assert(std::is_trivially_copyable<OatQuickMethodHeader>::value, "Cannot use memcpy"); memcpy(code_ptr - header_size, &method_header, header_size); - memcpy(code_ptr, fake_code_.data(), fake_code_.size()); + memcpy(code_ptr, fake_code.data(), fake_code.size()); if (kRuntimeISA == InstructionSet::kArm) { // Check that the Thumb2 adjustment will be a NOP, see EntryPointToCodePointer(). @@ -123,10 +131,12 @@ class ExceptionTest : public CommonRuntimeTest { method_g_->SetEntryPointFromQuickCompiledCode(code_ptr); } + void TearDown() override { munmap(fake_header_code_and_maps_, fake_header_code_and_maps_size_); } + const DexFile* dex_; - std::vector<uint8_t> fake_code_; - std::vector<uint8_t> fake_header_code_and_maps_; + size_t fake_header_code_and_maps_size_; + uint8_t* fake_header_code_and_maps_; ArtMethod* method_f_; ArtMethod* method_g_; diff --git a/compiler/jit/jit_logger.h b/compiler/jit/jit_logger.h index 9d1f3073fa..79f47f817f 100644 --- a/compiler/jit/jit_logger.h +++ b/compiler/jit/jit_logger.h @@ -53,7 +53,7 @@ namespace jit { // // Command line Example: // $ perf record -k mono dalvikvm -Xcompiler-option --generate-debug-info -cp <classpath> Test -// $ perf inject -i perf.data -o perf.data.jitted +// $ perf inject -j -i perf.data -o perf.data.jitted // $ perf report -i perf.data.jitted // $ perf annotate -i perf.data.jitted // NOTE: diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc index 70cf2d4eb0..ae5f2d0aa9 100644 --- a/compiler/jni/jni_cfi_test.cc +++ b/compiler/jni/jni_cfi_test.cc @@ -99,7 +99,7 @@ class JNICFITest : public CFITest { jni_asm->FinalizeCode(); std::vector<uint8_t> actual_asm(jni_asm->CodeSize()); MemoryRegion code(&actual_asm[0], actual_asm.size()); - jni_asm->FinalizeInstructions(code); + jni_asm->CopyInstructions(code); ASSERT_EQ(jni_asm->cfi().GetCurrentCFAOffset(), frame_size); const std::vector<uint8_t>& actual_cfi = *(jni_asm->cfi().data()); diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc index 397db251b8..40989b2999 100644 --- a/compiler/jni/jni_compiler_test.cc +++ b/compiler/jni/jni_compiler_test.cc @@ -175,9 +175,8 @@ size_t count_nonnull_refs_single_helper(T arg, // SFINAE for non-ref-types. Always 0. template <typename T> -size_t count_nonnull_refs_single_helper(T arg ATTRIBUTE_UNUSED, - typename std::enable_if<!jni_type_traits<T>::is_ref>::type* - = nullptr) { +size_t count_nonnull_refs_single_helper( + [[maybe_unused]] T arg, typename std::enable_if<!jni_type_traits<T>::is_ref>::type* = nullptr) { return 0; } @@ -591,10 +590,9 @@ struct ScopedCheckHandleScope { class CountReferencesVisitor : public RootVisitor { public: - void VisitRoots(mirror::Object*** roots ATTRIBUTE_UNUSED, + void VisitRoots([[maybe_unused]] mirror::Object*** roots, size_t count, - const RootInfo& info) override - REQUIRES_SHARED(Locks::mutator_lock_) { + const RootInfo& info) override REQUIRES_SHARED(Locks::mutator_lock_) { if (info.GetType() == art::RootType::kRootJavaFrame) { const JavaFrameRootInfo& jrfi = static_cast<const JavaFrameRootInfo&>(info); if (jrfi.GetVReg() == JavaFrameRootInfo::kNativeReferenceArgument) { @@ -604,10 +602,9 @@ class CountReferencesVisitor : public RootVisitor { } } - void VisitRoots(mirror::CompressedReference<mirror::Object>** roots ATTRIBUTE_UNUSED, - size_t count ATTRIBUTE_UNUSED, - const RootInfo& info) override - REQUIRES_SHARED(Locks::mutator_lock_) { + void VisitRoots([[maybe_unused]] mirror::CompressedReference<mirror::Object>** roots, + [[maybe_unused]] size_t count, + const RootInfo& info) override REQUIRES_SHARED(Locks::mutator_lock_) { CHECK_NE(info.GetType(), art::RootType::kRootJavaFrame); } @@ -980,8 +977,8 @@ void JniCompilerTest::CompileAndRunIntObjectObjectMethodImpl() { JNI_TEST(CompileAndRunIntObjectObjectMethod) int gJava_MyClassNatives_fooSII_calls[kJniKindCount] = {}; -jint Java_MyClassNatives_fooSII(JNIEnv* env ATTRIBUTE_UNUSED, - jclass klass ATTRIBUTE_UNUSED, +jint Java_MyClassNatives_fooSII([[maybe_unused]] JNIEnv* env, + [[maybe_unused]] jclass klass, jint x, jint y) { gJava_MyClassNatives_fooSII_calls[gCurrentJni]++; @@ -1003,8 +1000,8 @@ void JniCompilerTest::CompileAndRunStaticIntIntMethodImpl() { JNI_TEST_CRITICAL(CompileAndRunStaticIntIntMethod) int gJava_MyClassNatives_fooSDD_calls[kJniKindCount] = {}; -jdouble Java_MyClassNatives_fooSDD(JNIEnv* env ATTRIBUTE_UNUSED, - jclass klass ATTRIBUTE_UNUSED, +jdouble Java_MyClassNatives_fooSDD([[maybe_unused]] JNIEnv* env, + [[maybe_unused]] jclass klass, jdouble x, jdouble y) { gJava_MyClassNatives_fooSDD_calls[gCurrentJni]++; @@ -1676,8 +1673,8 @@ void JniCompilerTest::CompileAndRunFloatFloatMethodImpl() { JNI_TEST(CompileAndRunFloatFloatMethod) -void Java_MyClassNatives_checkParameterAlign(JNIEnv* env ATTRIBUTE_UNUSED, - jobject thisObj ATTRIBUTE_UNUSED, +void Java_MyClassNatives_checkParameterAlign([[maybe_unused]] JNIEnv* env, + [[maybe_unused]] jobject thisObj, jint i1, jlong l1) { EXPECT_EQ(i1, 1234); diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc index e716502911..cd6aac517d 100644 --- a/compiler/jni/quick/arm64/calling_convention_arm64.cc +++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc @@ -323,7 +323,7 @@ ArrayRef<const ManagedRegister> Arm64JniCallingConvention::CalleeSaveRegisters() static_assert(kCalleeSaveRegisters[lr_index].Equals( Arm64ManagedRegister::FromXRegister(LR))); return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters).SubArray( - /*pos*/ lr_index, /*length=*/ 1u); + /*pos=*/ lr_index, /*length=*/ 1u); } } else { return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters); diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc index 2b9da6ba1a..459beb0c67 100644 --- a/compiler/jni/quick/calling_convention.cc +++ b/compiler/jni/quick/calling_convention.cc @@ -29,6 +29,10 @@ #include "jni/quick/arm64/calling_convention_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_riscv64 +#include "jni/quick/riscv64/calling_convention_riscv64.h" +#endif + #ifdef ART_ENABLE_CODEGEN_x86 #include "jni/quick/x86/calling_convention_x86.h" #endif @@ -61,6 +65,12 @@ std::unique_ptr<ManagedRuntimeCallingConvention> ManagedRuntimeCallingConvention new (allocator) arm64::Arm64ManagedRuntimeCallingConvention( is_static, is_synchronized, shorty)); #endif +#ifdef ART_ENABLE_CODEGEN_riscv64 + case InstructionSet::kRiscv64: + return std::unique_ptr<ManagedRuntimeCallingConvention>( + new (allocator) riscv64::Riscv64ManagedRuntimeCallingConvention( + is_static, is_synchronized, shorty)); +#endif #ifdef ART_ENABLE_CODEGEN_x86 case InstructionSet::kX86: return std::unique_ptr<ManagedRuntimeCallingConvention>( @@ -114,7 +124,7 @@ bool ManagedRuntimeCallingConvention::IsCurrentArgPossiblyNull() { } size_t ManagedRuntimeCallingConvention::CurrentParamSize() { - return ParamSize(itr_args_); + return ParamSize(itr_args_, /*reference_size=*/ sizeof(mirror::HeapReference<mirror::Object>)); } bool ManagedRuntimeCallingConvention::IsCurrentParamAReference() { @@ -156,6 +166,12 @@ std::unique_ptr<JniCallingConvention> JniCallingConvention::Create(ArenaAllocato new (allocator) arm64::Arm64JniCallingConvention( is_static, is_synchronized, is_fast_native, is_critical_native, shorty)); #endif +#ifdef ART_ENABLE_CODEGEN_riscv64 + case InstructionSet::kRiscv64: + return std::unique_ptr<JniCallingConvention>( + new (allocator) riscv64::Riscv64JniCallingConvention( + is_static, is_synchronized, is_fast_native, is_critical_native, shorty)); +#endif #ifdef ART_ENABLE_CODEGEN_x86 case InstructionSet::kX86: return std::unique_ptr<JniCallingConvention>( @@ -188,7 +204,7 @@ bool JniCallingConvention::HasNext() { if (IsCurrentArgExtraForJni()) { return true; } else { - unsigned int arg_pos = GetIteratorPositionWithinShorty(); + size_t arg_pos = GetIteratorPositionWithinShorty(); return arg_pos < NumArgs(); } } @@ -220,7 +236,7 @@ bool JniCallingConvention::IsCurrentParamAReference() { &return_value)) { return return_value; } else { - int arg_pos = GetIteratorPositionWithinShorty(); + size_t arg_pos = GetIteratorPositionWithinShorty(); return IsParamAReference(arg_pos); } } @@ -242,7 +258,7 @@ bool JniCallingConvention::IsCurrentParamAFloatOrDouble() { &return_value)) { return return_value; } else { - int arg_pos = GetIteratorPositionWithinShorty(); + size_t arg_pos = GetIteratorPositionWithinShorty(); return IsParamAFloatOrDouble(arg_pos); } } @@ -256,7 +272,7 @@ bool JniCallingConvention::IsCurrentParamADouble() { &return_value)) { return return_value; } else { - int arg_pos = GetIteratorPositionWithinShorty(); + size_t arg_pos = GetIteratorPositionWithinShorty(); return IsParamADouble(arg_pos); } } @@ -270,7 +286,7 @@ bool JniCallingConvention::IsCurrentParamALong() { &return_value)) { return return_value; } else { - int arg_pos = GetIteratorPositionWithinShorty(); + size_t arg_pos = GetIteratorPositionWithinShorty(); return IsParamALong(arg_pos); } } @@ -279,8 +295,9 @@ size_t JniCallingConvention::CurrentParamSize() const { if (IsCurrentArgExtraForJni()) { return static_cast<size_t>(frame_pointer_size_); // JNIEnv or jobject/jclass } else { - int arg_pos = GetIteratorPositionWithinShorty(); - return ParamSize(arg_pos); + size_t arg_pos = GetIteratorPositionWithinShorty(); + // References are converted to `jobject` for the native call. Pass `frame_pointer_size_`. + return ParamSize(arg_pos, /*reference_size=*/ static_cast<size_t>(frame_pointer_size_)); } } @@ -305,7 +322,7 @@ bool JniCallingConvention::HasSelfClass() const { } } -unsigned int JniCallingConvention::GetIteratorPositionWithinShorty() const { +size_t JniCallingConvention::GetIteratorPositionWithinShorty() const { // We need to subtract out the extra JNI arguments if we want to use this iterator position // with the inherited CallingConvention member functions, which rely on scanning the shorty. // Note that our shorty does *not* include the JNIEnv, jclass/jobject parameters. diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h index 0187b14256..2657e943e6 100644 --- a/compiler/jni/quick/calling_convention.h +++ b/compiler/jni/quick/calling_convention.h @@ -178,14 +178,18 @@ class CallingConvention : public DeletableArenaObject<kArenaAllocCallingConventi size_t NumReferenceArgs() const { return num_ref_args_; } - size_t ParamSize(unsigned int param) const { + size_t ParamSize(size_t param, size_t reference_size) const { DCHECK_LT(param, NumArgs()); if (IsStatic()) { param++; // 0th argument must skip return value at start of the shorty } else if (param == 0) { - return sizeof(mirror::HeapReference<mirror::Object>); // this argument + return reference_size; // this argument } - size_t result = Primitive::ComponentSize(Primitive::GetType(shorty_[param])); + Primitive::Type type = Primitive::GetType(shorty_[param]); + if (type == Primitive::kPrimNot) { + return reference_size; + } + size_t result = Primitive::ComponentSize(type); if (result >= 1 && result < 4) { result = 4; } @@ -344,7 +348,7 @@ class JniCallingConvention : public CallingConvention { return IsCurrentParamALong() || IsCurrentParamADouble(); } bool IsCurrentParamJniEnv(); - size_t CurrentParamSize() const; + virtual size_t CurrentParamSize() const; virtual bool IsCurrentParamInRegister() = 0; virtual bool IsCurrentParamOnStack() = 0; virtual ManagedRegister CurrentParamRegister() = 0; @@ -432,7 +436,7 @@ class JniCallingConvention : public CallingConvention { bool HasSelfClass() const; // Returns the position of itr_args_, fixed up by removing the offset of extra JNI arguments. - unsigned int GetIteratorPositionWithinShorty() const; + size_t GetIteratorPositionWithinShorty() const; // Is the current argument (at the iterator) an extra argument for JNI? bool IsCurrentArgExtraForJni() const; diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc index c60d97467e..9349d2c9fd 100644 --- a/compiler/jni/quick/jni_compiler.cc +++ b/compiler/jni/quick/jni_compiler.cc @@ -154,11 +154,11 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp // -- Don't allow any objects as parameter or return value if (UNLIKELY(is_critical_native)) { CHECK(is_static) - << "@CriticalNative functions cannot be virtual since that would" + << "@CriticalNative functions cannot be virtual since that would " << "require passing a reference parameter (this), which is illegal " << dex_file.PrettyMethod(method_idx, /* with_signature= */ true); CHECK(!is_synchronized) - << "@CriticalNative functions cannot be synchronized since that would" + << "@CriticalNative functions cannot be synchronized since that would " << "require passing a (class and/or this) reference parameter, which is illegal " << dex_file.PrettyMethod(method_idx, /* with_signature= */ true); for (size_t i = 0; i < strlen(shorty); ++i) { @@ -387,8 +387,8 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp DCHECK(main_jni_conv->HasNext()); static_assert(kObjectReferenceSize == 4u); bool is_reference = mr_conv->IsCurrentParamAReference(); - size_t src_size = (!is_reference && mr_conv->IsCurrentParamALongOrDouble()) ? 8u : 4u; - size_t dest_size = is_reference ? kRawPointerSize : src_size; + size_t src_size = mr_conv->CurrentParamSize(); + size_t dest_size = main_jni_conv->CurrentParamSize(); src_args.push_back(mr_conv->IsCurrentParamInRegister() ? ArgumentLocation(mr_conv->CurrentParamRegister(), src_size) : ArgumentLocation(mr_conv->CurrentParamStackOffset(), src_size)); @@ -621,7 +621,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp main_jni_conv->CalleeSaveScratchRegisters()[0], kObjectReferenceSize); // Load the declaring class reference. DCHECK_EQ(ArtMethod::DeclaringClassOffset().SizeValue(), 0u); - __ Load(temp, method_register, MemberOffset(0u), kObjectReferenceSize); + __ LoadGcRootWithoutReadBarrier(temp, method_register, MemberOffset(0u)); // Return to main path if the class object is marked. __ TestMarkBit(temp, jclass_read_barrier_return.get(), JNIMacroUnaryCondition::kNotZero); } @@ -724,7 +724,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp size_t cs = __ CodeSize(); std::vector<uint8_t> managed_code(cs); MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); + __ CopyInstructions(code); return JniCompiledMethod(instruction_set, std::move(managed_code), diff --git a/compiler/jni/quick/riscv64/calling_convention_riscv64.cc b/compiler/jni/quick/riscv64/calling_convention_riscv64.cc new file mode 100644 index 0000000000..b083fec14a --- /dev/null +++ b/compiler/jni/quick/riscv64/calling_convention_riscv64.cc @@ -0,0 +1,429 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "calling_convention_riscv64.h" + +#include <android-base/logging.h> + +#include "arch/instruction_set.h" +#include "arch/riscv64/jni_frame_riscv64.h" +#include "utils/riscv64/managed_register_riscv64.h" + +namespace art HIDDEN { +namespace riscv64 { + +static constexpr ManagedRegister kXArgumentRegisters[] = { + Riscv64ManagedRegister::FromXRegister(A0), + Riscv64ManagedRegister::FromXRegister(A1), + Riscv64ManagedRegister::FromXRegister(A2), + Riscv64ManagedRegister::FromXRegister(A3), + Riscv64ManagedRegister::FromXRegister(A4), + Riscv64ManagedRegister::FromXRegister(A5), + Riscv64ManagedRegister::FromXRegister(A6), + Riscv64ManagedRegister::FromXRegister(A7), +}; +static_assert(kMaxIntLikeArgumentRegisters == arraysize(kXArgumentRegisters)); + +static const FRegister kFArgumentRegisters[] = { + FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7 +}; +static_assert(kMaxFloatOrDoubleArgumentRegisters == arraysize(kFArgumentRegisters)); + +static constexpr ManagedRegister kCalleeSaveRegisters[] = { + // Core registers. + Riscv64ManagedRegister::FromXRegister(S0), + // ART thread register (TR = S1) is not saved on the stack. + Riscv64ManagedRegister::FromXRegister(S2), + Riscv64ManagedRegister::FromXRegister(S3), + Riscv64ManagedRegister::FromXRegister(S4), + Riscv64ManagedRegister::FromXRegister(S5), + Riscv64ManagedRegister::FromXRegister(S6), + Riscv64ManagedRegister::FromXRegister(S7), + Riscv64ManagedRegister::FromXRegister(S8), + Riscv64ManagedRegister::FromXRegister(S9), + Riscv64ManagedRegister::FromXRegister(S10), + Riscv64ManagedRegister::FromXRegister(S11), + Riscv64ManagedRegister::FromXRegister(RA), + + // Hard float registers. + Riscv64ManagedRegister::FromFRegister(FS0), + Riscv64ManagedRegister::FromFRegister(FS1), + Riscv64ManagedRegister::FromFRegister(FS2), + Riscv64ManagedRegister::FromFRegister(FS3), + Riscv64ManagedRegister::FromFRegister(FS4), + Riscv64ManagedRegister::FromFRegister(FS5), + Riscv64ManagedRegister::FromFRegister(FS6), + Riscv64ManagedRegister::FromFRegister(FS7), + Riscv64ManagedRegister::FromFRegister(FS8), + Riscv64ManagedRegister::FromFRegister(FS9), + Riscv64ManagedRegister::FromFRegister(FS10), + Riscv64ManagedRegister::FromFRegister(FS11), +}; + +template <size_t size> +static constexpr uint32_t CalculateCoreCalleeSpillMask( + const ManagedRegister (&callee_saves)[size]) { + uint32_t result = 0u; + for (auto&& r : callee_saves) { + if (r.AsRiscv64().IsXRegister()) { + result |= (1u << r.AsRiscv64().AsXRegister()); + } + } + return result; +} + +template <size_t size> +static constexpr uint32_t CalculateFpCalleeSpillMask(const ManagedRegister (&callee_saves)[size]) { + uint32_t result = 0u; + for (auto&& r : callee_saves) { + if (r.AsRiscv64().IsFRegister()) { + result |= (1u << r.AsRiscv64().AsFRegister()); + } + } + return result; +} + +static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask(kCalleeSaveRegisters); +static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask(kCalleeSaveRegisters); + +static constexpr ManagedRegister kNativeCalleeSaveRegisters[] = { + // Core registers. + Riscv64ManagedRegister::FromXRegister(S0), + Riscv64ManagedRegister::FromXRegister(S1), + Riscv64ManagedRegister::FromXRegister(S2), + Riscv64ManagedRegister::FromXRegister(S3), + Riscv64ManagedRegister::FromXRegister(S4), + Riscv64ManagedRegister::FromXRegister(S5), + Riscv64ManagedRegister::FromXRegister(S6), + Riscv64ManagedRegister::FromXRegister(S7), + Riscv64ManagedRegister::FromXRegister(S8), + Riscv64ManagedRegister::FromXRegister(S9), + Riscv64ManagedRegister::FromXRegister(S10), + Riscv64ManagedRegister::FromXRegister(S11), + Riscv64ManagedRegister::FromXRegister(RA), + + // Hard float registers. + Riscv64ManagedRegister::FromFRegister(FS0), + Riscv64ManagedRegister::FromFRegister(FS1), + Riscv64ManagedRegister::FromFRegister(FS2), + Riscv64ManagedRegister::FromFRegister(FS3), + Riscv64ManagedRegister::FromFRegister(FS4), + Riscv64ManagedRegister::FromFRegister(FS5), + Riscv64ManagedRegister::FromFRegister(FS6), + Riscv64ManagedRegister::FromFRegister(FS7), + Riscv64ManagedRegister::FromFRegister(FS8), + Riscv64ManagedRegister::FromFRegister(FS9), + Riscv64ManagedRegister::FromFRegister(FS10), + Riscv64ManagedRegister::FromFRegister(FS11), +}; + +static constexpr uint32_t kNativeCoreCalleeSpillMask = + CalculateCoreCalleeSpillMask(kNativeCalleeSaveRegisters); +static constexpr uint32_t kNativeFpCalleeSpillMask = + CalculateFpCalleeSpillMask(kNativeCalleeSaveRegisters); + +static ManagedRegister ReturnRegisterForShorty(const char* shorty) { + if (shorty[0] == 'F' || shorty[0] == 'D') { + return Riscv64ManagedRegister::FromFRegister(FA0); + } else if (shorty[0] == 'V') { + return Riscv64ManagedRegister::NoRegister(); + } else { + // All other return types use A0. Note that there is no managed type wide enough to use A1/FA1. + return Riscv64ManagedRegister::FromXRegister(A0); + } +} + +// Managed runtime calling convention + +ManagedRegister Riscv64ManagedRuntimeCallingConvention::ReturnRegister() const { + return ReturnRegisterForShorty(GetShorty()); +} + +ManagedRegister Riscv64ManagedRuntimeCallingConvention::MethodRegister() { + return Riscv64ManagedRegister::FromXRegister(A0); +} + +ManagedRegister Riscv64ManagedRuntimeCallingConvention::ArgumentRegisterForMethodExitHook() { + DCHECK(!Riscv64ManagedRegister::FromXRegister(A4).Overlaps(ReturnRegister().AsRiscv64())); + return Riscv64ManagedRegister::FromXRegister(A4); +} + +bool Riscv64ManagedRuntimeCallingConvention::IsCurrentParamInRegister() { + // Note: The managed ABI does not pass FP args in general purpose registers. + // This differs from the native ABI which does that after using all FP arg registers. + if (IsCurrentParamAFloatOrDouble()) { + return itr_float_and_doubles_ < kMaxFloatOrDoubleArgumentRegisters; + } else { + size_t non_fp_arg_number = itr_args_ - itr_float_and_doubles_; + return /* method */ 1u + non_fp_arg_number < kMaxIntLikeArgumentRegisters; + } +} + +bool Riscv64ManagedRuntimeCallingConvention::IsCurrentParamOnStack() { + return !IsCurrentParamInRegister(); +} + +ManagedRegister Riscv64ManagedRuntimeCallingConvention::CurrentParamRegister() { + DCHECK(IsCurrentParamInRegister()); + if (IsCurrentParamAFloatOrDouble()) { + return Riscv64ManagedRegister::FromFRegister(kFArgumentRegisters[itr_float_and_doubles_]); + } else { + size_t non_fp_arg_number = itr_args_ - itr_float_and_doubles_; + return kXArgumentRegisters[/* method */ 1u + non_fp_arg_number]; + } +} + +FrameOffset Riscv64ManagedRuntimeCallingConvention::CurrentParamStackOffset() { + return FrameOffset(displacement_.Int32Value() + // displacement + kFramePointerSize + // Method ref + (itr_slots_ * sizeof(uint32_t))); // offset into in args +} + +// JNI calling convention + +Riscv64JniCallingConvention::Riscv64JniCallingConvention(bool is_static, + bool is_synchronized, + bool is_fast_native, + bool is_critical_native, + const char* shorty) + : JniCallingConvention(is_static, + is_synchronized, + is_fast_native, + is_critical_native, + shorty, + kRiscv64PointerSize) { +} + +ManagedRegister Riscv64JniCallingConvention::ReturnRegister() const { + return ReturnRegisterForShorty(GetShorty()); +} + +ManagedRegister Riscv64JniCallingConvention::IntReturnRegister() const { + return Riscv64ManagedRegister::FromXRegister(A0); +} + +size_t Riscv64JniCallingConvention::FrameSize() const { + if (is_critical_native_) { + CHECK(!SpillsMethod()); + CHECK(!HasLocalReferenceSegmentState()); + return 0u; // There is no managed frame for @CriticalNative. + } + + // Method*, callee save area size, local reference segment state + DCHECK(SpillsMethod()); + size_t method_ptr_size = static_cast<size_t>(kFramePointerSize); + size_t callee_save_area_size = CalleeSaveRegisters().size() * kFramePointerSize; + size_t total_size = method_ptr_size + callee_save_area_size; + + DCHECK(HasLocalReferenceSegmentState()); + // Cookie is saved in one of the spilled registers. + + return RoundUp(total_size, kStackAlignment); +} + +size_t Riscv64JniCallingConvention::OutFrameSize() const { + // Count param args, including JNIEnv* and jclass*. + size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs(); + size_t num_fp_args = NumFloatOrDoubleArgs(); + DCHECK_GE(all_args, num_fp_args); + size_t num_non_fp_args = all_args - num_fp_args; + // The size of outgoing arguments. + size_t size = GetNativeOutArgsSize(num_fp_args, num_non_fp_args); + + // @CriticalNative can use tail call as all managed callee saves are preserved by AAPCS64. + static_assert((kCoreCalleeSpillMask & ~kNativeCoreCalleeSpillMask) == 0u); + static_assert((kFpCalleeSpillMask & ~kNativeFpCalleeSpillMask) == 0u); + + // For @CriticalNative, we can make a tail call if there are no stack args. + // Otherwise, add space for return PC. + // Note: Result does not neeed to be zero- or sign-extended. + DCHECK(!RequiresSmallResultTypeExtension()); + if (is_critical_native_ && size != 0u) { + size += kFramePointerSize; // We need to spill RA with the args. + } + size_t out_args_size = RoundUp(size, kNativeStackAlignment); + if (UNLIKELY(IsCriticalNative())) { + DCHECK_EQ(out_args_size, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u)); + } + return out_args_size; +} + +ArrayRef<const ManagedRegister> Riscv64JniCallingConvention::CalleeSaveRegisters() const { + if (UNLIKELY(IsCriticalNative())) { + if (UseTailCall()) { + return ArrayRef<const ManagedRegister>(); // Do not spill anything. + } else { + // Spill RA with out args. + static_assert((kCoreCalleeSpillMask & (1 << RA)) != 0u); // Contains RA. + constexpr size_t ra_index = POPCOUNT(kCoreCalleeSpillMask) - 1u; + static_assert(kCalleeSaveRegisters[ra_index].Equals( + Riscv64ManagedRegister::FromXRegister(RA))); + return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters).SubArray( + /*pos=*/ ra_index, /*length=*/ 1u); + } + } else { + return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters); + } +} + +ArrayRef<const ManagedRegister> Riscv64JniCallingConvention::CalleeSaveScratchRegisters() const { + DCHECK(!IsCriticalNative()); + // Use S3-S11 from managed callee saves. All these registers are also native callee saves. + constexpr size_t kStart = 2u; + constexpr size_t kLength = 9u; + static_assert(kCalleeSaveRegisters[kStart].Equals(Riscv64ManagedRegister::FromXRegister(S3))); + static_assert(kCalleeSaveRegisters[kStart + kLength - 1u].Equals( + Riscv64ManagedRegister::FromXRegister(S11))); + static_assert((kCoreCalleeSpillMask & ~kNativeCoreCalleeSpillMask) == 0u); + return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters).SubArray(kStart, kLength); +} + +ArrayRef<const ManagedRegister> Riscv64JniCallingConvention::ArgumentScratchRegisters() const { + DCHECK(!IsCriticalNative()); + // Exclude A0 if it's used as a return register. + static_assert(kXArgumentRegisters[0].Equals(Riscv64ManagedRegister::FromXRegister(A0))); + ArrayRef<const ManagedRegister> scratch_regs(kXArgumentRegisters); + Riscv64ManagedRegister return_reg = ReturnRegister().AsRiscv64(); + auto return_reg_overlaps = [return_reg](ManagedRegister reg) { + return return_reg.Overlaps(reg.AsRiscv64()); + }; + if (return_reg_overlaps(scratch_regs[0])) { + scratch_regs = scratch_regs.SubArray(/*pos=*/ 1u); + } + DCHECK(std::none_of(scratch_regs.begin(), scratch_regs.end(), return_reg_overlaps)); + return scratch_regs; +} + +uint32_t Riscv64JniCallingConvention::CoreSpillMask() const { + return is_critical_native_ ? 0u : kCoreCalleeSpillMask; +} + +uint32_t Riscv64JniCallingConvention::FpSpillMask() const { + return is_critical_native_ ? 0u : kFpCalleeSpillMask; +} + +size_t Riscv64JniCallingConvention::CurrentParamSize() const { + if (IsCurrentArgExtraForJni()) { + return static_cast<size_t>(frame_pointer_size_); // JNIEnv or jobject/jclass + } else { + size_t arg_pos = GetIteratorPositionWithinShorty(); + DCHECK_LT(arg_pos, NumArgs()); + if (IsStatic()) { + ++arg_pos; // 0th argument must skip return value at start of the shorty + } else if (arg_pos == 0) { + return static_cast<size_t>(kRiscv64PointerSize); // this argument + } + // The riscv64 native calling convention specifies that integers narrower than XLEN (64) + // bits are "widened according to the sign of their type up to 32 bits, then sign-extended + // to XLEN bits." Thus, everything other than `float` (which has the high 32 bits undefined) + // is passed as 64 bits, whether in register, or on the stack. + return (GetShorty()[arg_pos] == 'F') ? 4u : static_cast<size_t>(kRiscv64PointerSize); + } +} + +bool Riscv64JniCallingConvention::IsCurrentParamInRegister() { + // FP args use FPRs, then GPRs and only then the stack. + if (itr_float_and_doubles_ < kMaxFloatOrDoubleArgumentRegisters) { + if (IsCurrentParamAFloatOrDouble()) { + return true; + } else { + size_t num_non_fp_args = itr_args_ - itr_float_and_doubles_; + return num_non_fp_args < kMaxIntLikeArgumentRegisters; + } + } else { + return (itr_args_ < kMaxFloatOrDoubleArgumentRegisters + kMaxIntLikeArgumentRegisters); + } +} + +bool Riscv64JniCallingConvention::IsCurrentParamOnStack() { + return !IsCurrentParamInRegister(); +} + +ManagedRegister Riscv64JniCallingConvention::CurrentParamRegister() { + // FP args use FPRs, then GPRs and only then the stack. + CHECK(IsCurrentParamInRegister()); + if (itr_float_and_doubles_ < kMaxFloatOrDoubleArgumentRegisters) { + if (IsCurrentParamAFloatOrDouble()) { + return Riscv64ManagedRegister::FromFRegister(kFArgumentRegisters[itr_float_and_doubles_]); + } else { + size_t num_non_fp_args = itr_args_ - itr_float_and_doubles_; + DCHECK_LT(num_non_fp_args, kMaxIntLikeArgumentRegisters); + return kXArgumentRegisters[num_non_fp_args]; + } + } else { + // This argument is in a GPR, whether it's a FP arg or a non-FP arg. + DCHECK_LT(itr_args_, kMaxFloatOrDoubleArgumentRegisters + kMaxIntLikeArgumentRegisters); + return kXArgumentRegisters[itr_args_ - kMaxFloatOrDoubleArgumentRegisters]; + } +} + +FrameOffset Riscv64JniCallingConvention::CurrentParamStackOffset() { + CHECK(IsCurrentParamOnStack()); + // Account for FP arguments passed through FA0-FA7. + // All other args are passed through A0-A7 (even FP args) and the stack. + size_t num_gpr_and_stack_args = + itr_args_ - std::min<size_t>(kMaxFloatOrDoubleArgumentRegisters, itr_float_and_doubles_); + size_t args_on_stack = + num_gpr_and_stack_args - std::min(kMaxIntLikeArgumentRegisters, num_gpr_and_stack_args); + size_t offset = displacement_.Int32Value() - OutFrameSize() + (args_on_stack * kFramePointerSize); + CHECK_LT(offset, OutFrameSize()); + return FrameOffset(offset); +} + +bool Riscv64JniCallingConvention::RequiresSmallResultTypeExtension() const { + // RISC-V native calling convention requires values to be returned the way that the first + // argument would be passed. Arguments are zero-/sign-extended to 32 bits based on their + // type, then sign-extended to 64 bits. This is the same as in the ART mamaged ABI. + // (Not applicable to FP args which are returned in `FA0`. A `float` is NaN-boxed.) + return false; +} + +// T0 is neither managed callee-save, nor argument register. It is suitable for use as the +// locking argument for synchronized methods and hidden argument for @CriticalNative methods. +static void AssertT0IsNeitherCalleeSaveNorArgumentRegister() { + // TODO: Change to static_assert; std::none_of should be constexpr since C++20. + DCHECK(std::none_of(kCalleeSaveRegisters, + kCalleeSaveRegisters + std::size(kCalleeSaveRegisters), + [](ManagedRegister callee_save) constexpr { + return callee_save.Equals(Riscv64ManagedRegister::FromXRegister(T0)); + })); + DCHECK(std::none_of(kXArgumentRegisters, + kXArgumentRegisters + std::size(kXArgumentRegisters), + [](ManagedRegister arg) { return arg.AsRiscv64().AsXRegister() == T0; })); +} + +ManagedRegister Riscv64JniCallingConvention::LockingArgumentRegister() const { + DCHECK(!IsFastNative()); + DCHECK(!IsCriticalNative()); + DCHECK(IsSynchronized()); + AssertT0IsNeitherCalleeSaveNorArgumentRegister(); + return Riscv64ManagedRegister::FromXRegister(T0); +} + +ManagedRegister Riscv64JniCallingConvention::HiddenArgumentRegister() const { + DCHECK(IsCriticalNative()); + AssertT0IsNeitherCalleeSaveNorArgumentRegister(); + return Riscv64ManagedRegister::FromXRegister(T0); +} + +// Whether to use tail call (used only for @CriticalNative). +bool Riscv64JniCallingConvention::UseTailCall() const { + CHECK(IsCriticalNative()); + return OutFrameSize() == 0u; +} + +} // namespace riscv64 +} // namespace art diff --git a/compiler/jni/quick/riscv64/calling_convention_riscv64.h b/compiler/jni/quick/riscv64/calling_convention_riscv64.h new file mode 100644 index 0000000000..5add183f72 --- /dev/null +++ b/compiler/jni/quick/riscv64/calling_convention_riscv64.h @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_JNI_QUICK_RISCV64_CALLING_CONVENTION_RISCV64_H_ +#define ART_COMPILER_JNI_QUICK_RISCV64_CALLING_CONVENTION_RISCV64_H_ + +#include "base/enums.h" +#include "base/macros.h" +#include "jni/quick/calling_convention.h" + +namespace art HIDDEN { +namespace riscv64 { + +class Riscv64ManagedRuntimeCallingConvention final : public ManagedRuntimeCallingConvention { + public: + Riscv64ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty) + : ManagedRuntimeCallingConvention(is_static, + is_synchronized, + shorty, + PointerSize::k64) {} + ~Riscv64ManagedRuntimeCallingConvention() override {} + // Calling convention + ManagedRegister ReturnRegister() const override; + // Managed runtime calling convention + ManagedRegister MethodRegister() override; + ManagedRegister ArgumentRegisterForMethodExitHook() override; + bool IsCurrentParamInRegister() override; + bool IsCurrentParamOnStack() override; + ManagedRegister CurrentParamRegister() override; + FrameOffset CurrentParamStackOffset() override; + + private: + DISALLOW_COPY_AND_ASSIGN(Riscv64ManagedRuntimeCallingConvention); +}; + +class Riscv64JniCallingConvention final : public JniCallingConvention { + public: + Riscv64JniCallingConvention(bool is_static, + bool is_synchronized, + bool is_fast_native, + bool is_critical_native, + const char* shorty); + ~Riscv64JniCallingConvention() override {} + // Calling convention + ManagedRegister ReturnRegister() const override; + ManagedRegister IntReturnRegister() const override; + // JNI calling convention + size_t FrameSize() const override; + size_t OutFrameSize() const override; + ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override; + ArrayRef<const ManagedRegister> CalleeSaveScratchRegisters() const override; + ArrayRef<const ManagedRegister> ArgumentScratchRegisters() const override; + uint32_t CoreSpillMask() const override; + uint32_t FpSpillMask() const override; + size_t CurrentParamSize() const override; + bool IsCurrentParamInRegister() override; + bool IsCurrentParamOnStack() override; + ManagedRegister CurrentParamRegister() override; + FrameOffset CurrentParamStackOffset() override; + bool RequiresSmallResultTypeExtension() const override; + + // Locking argument register, used to pass the synchronization object for calls + // to `JniLockObject()` and `JniUnlockObject()`. + ManagedRegister LockingArgumentRegister() const override; + + // Hidden argument register, used to pass the method pointer for @CriticalNative call. + ManagedRegister HiddenArgumentRegister() const override; + + // Whether to use tail call (used only for @CriticalNative). + bool UseTailCall() const override; + + private: + DISALLOW_COPY_AND_ASSIGN(Riscv64JniCallingConvention); +}; + +} // namespace riscv64 +} // namespace art + +#endif // ART_COMPILER_JNI_QUICK_RISCV64_CALLING_CONVENTION_RISCV64_H_ diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc index 9d0761d2f7..0f981dd6df 100644 --- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc +++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc @@ -117,7 +117,7 @@ ArrayRef<const ManagedRegister> X86_64JniCallingConvention::ArgumentScratchRegis return scratch_regs; } -static ManagedRegister ReturnRegisterForShorty(const char* shorty, bool jni ATTRIBUTE_UNUSED) { +static ManagedRegister ReturnRegisterForShorty(const char* shorty, [[maybe_unused]] bool jni) { if (shorty[0] == 'F' || shorty[0] == 'D') { return X86_64ManagedRegister::FromXmmRegister(XMM0); } else if (shorty[0] == 'J') { diff --git a/compiler/linker/output_stream_test.cc b/compiler/linker/output_stream_test.cc index 22b174fce6..6b62874643 100644 --- a/compiler/linker/output_stream_test.cc +++ b/compiler/linker/output_stream_test.cc @@ -107,13 +107,13 @@ TEST_F(OutputStreamTest, BufferedFlush) { flush_called(false) { } ~CheckingOutputStream() override {} - bool WriteFully(const void* buffer ATTRIBUTE_UNUSED, - size_t byte_count ATTRIBUTE_UNUSED) override { + bool WriteFully([[maybe_unused]] const void* buffer, + [[maybe_unused]] size_t byte_count) override { LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); } - off_t Seek(off_t offset ATTRIBUTE_UNUSED, Whence whence ATTRIBUTE_UNUSED) override { + off_t Seek([[maybe_unused]] off_t offset, [[maybe_unused]] Whence whence) override { LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); } diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc index 703584c537..9da2bfb8ef 100644 --- a/compiler/optimizing/block_builder.cc +++ b/compiler/optimizing/block_builder.cc @@ -20,7 +20,6 @@ #include "dex/bytecode_utils.h" #include "dex/code_item_accessors-inl.h" #include "dex/dex_file_exception_helpers.h" -#include "quicken_info.h" namespace art HIDDEN { @@ -40,9 +39,7 @@ HBasicBlockBuilder::HBasicBlockBuilder(HGraph* graph, local_allocator->Adapter(kArenaAllocGraphBuilder)), throwing_blocks_(kDefaultNumberOfThrowingBlocks, local_allocator->Adapter(kArenaAllocGraphBuilder)), - number_of_branches_(0u), - quicken_index_for_dex_pc_(std::less<uint32_t>(), - local_allocator->Adapter(kArenaAllocGraphBuilder)) {} + number_of_branches_(0u) {} HBasicBlock* HBasicBlockBuilder::MaybeCreateBlockAt(uint32_t dex_pc) { return MaybeCreateBlockAt(dex_pc, dex_pc); @@ -147,7 +144,6 @@ void HBasicBlockBuilder::ConnectBasicBlocks() { HBasicBlock* block = graph_->GetEntryBlock(); graph_->AddBlock(block); - size_t quicken_index = 0; bool is_throwing_block = false; // Calculate the qucikening index here instead of CreateBranchTargets since it's easier to // calculate in dex_pc order. @@ -158,8 +154,6 @@ void HBasicBlockBuilder::ConnectBasicBlocks() { // Check if this dex_pc address starts a new basic block. HBasicBlock* next_block = GetBlockAt(dex_pc); if (next_block != nullptr) { - // We only need quicken index entries for basic block boundaries. - quicken_index_for_dex_pc_.Put(dex_pc, quicken_index); if (block != nullptr) { // Last instruction did not end its basic block but a new one starts here. // It must have been a block falling through into the next one. @@ -169,10 +163,6 @@ void HBasicBlockBuilder::ConnectBasicBlocks() { is_throwing_block = false; graph_->AddBlock(block); } - // Make sure to increment this before the continues. - if (QuickenInfoTable::NeedsIndexForInstruction(&instruction)) { - ++quicken_index; - } if (block == nullptr) { // Ignore dead code. @@ -483,8 +473,4 @@ void HBasicBlockBuilder::BuildIntrinsic() { body->AddSuccessor(exit_block); } -size_t HBasicBlockBuilder::GetQuickenIndex(uint32_t dex_pc) const { - return quicken_index_for_dex_pc_.Get(dex_pc); -} - } // namespace art diff --git a/compiler/optimizing/block_builder.h b/compiler/optimizing/block_builder.h index 8668ef8221..1aa9375e5a 100644 --- a/compiler/optimizing/block_builder.h +++ b/compiler/optimizing/block_builder.h @@ -45,8 +45,6 @@ class HBasicBlockBuilder : public ValueObject { size_t GetNumberOfBranches() const { return number_of_branches_; } HBasicBlock* GetBlockAt(uint32_t dex_pc) const { return branch_targets_[dex_pc]; } - size_t GetQuickenIndex(uint32_t dex_pc) const; - private: // Creates a basic block starting at given `dex_pc`. HBasicBlock* MaybeCreateBlockAt(uint32_t dex_pc); @@ -83,9 +81,6 @@ class HBasicBlockBuilder : public ValueObject { ScopedArenaVector<HBasicBlock*> throwing_blocks_; size_t number_of_branches_; - // A table to quickly find the quicken index for the first instruction of a basic block. - ScopedArenaSafeMap<uint32_t, uint32_t> quicken_index_for_dex_pc_; - static constexpr size_t kDefaultNumberOfThrowingBlocks = 2u; DISALLOW_COPY_AND_ASSIGN(HBasicBlockBuilder); diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 919abfdc49..c0d4c37659 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -1047,14 +1047,14 @@ class BCEVisitor final : public HGraphVisitor { HDiv* div = nullptr; int64_t const_divisor = 0; - if (HMul* mul = instruction->GetRight()->AsMul()) { + if (HMul* mul = instruction->GetRight()->AsMulOrNull()) { if (!mul->GetLeft()->IsDiv() || !mul->GetRight()->IsConstant()) { return false; } div = mul->GetLeft()->AsDiv(); const_divisor = Int64FromConstant(mul->GetRight()->AsConstant()); - } else if (HAdd* add = instruction->GetRight()->AsAdd()) { - HShl* shl = add->GetRight()->AsShl(); + } else if (HAdd* add = instruction->GetRight()->AsAddOrNull()) { + HShl* shl = add->GetRight()->AsShlOrNull(); if (!is_needed_shl(shl)) { return false; } @@ -1070,8 +1070,8 @@ class BCEVisitor final : public HGraphVisitor { return false; } const_divisor = (1LL << n) + 1; - } else if (HSub* sub = instruction->GetRight()->AsSub()) { - HShl* shl = sub->GetLeft()->AsShl(); + } else if (HSub* sub = instruction->GetRight()->AsSubOrNull()) { + HShl* shl = sub->GetLeft()->AsShlOrNull(); if (!is_needed_shl(shl)) { return false; } @@ -1378,8 +1378,7 @@ class BCEVisitor final : public HGraphVisitor { HInstruction* array_length, HInstruction* base, int32_t min_c, int32_t max_c) { - HBoundsCheck* bounds_check = - first_index_bounds_check_map_.Get(array_length->GetId())->AsBoundsCheck(); + HBoundsCheck* bounds_check = first_index_bounds_check_map_.Get(array_length->GetId()); // Construct deoptimization on single or double bounds on range [base-min_c,base+max_c], // for example either for a[0]..a[3] just 3 or for a[base-1]..a[base+3] both base-1 // and base+3, since we made the assumption any in between value may occur too. diff --git a/compiler/optimizing/code_generation_data.cc b/compiler/optimizing/code_generation_data.cc new file mode 100644 index 0000000000..7b23d46dc5 --- /dev/null +++ b/compiler/optimizing/code_generation_data.cc @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "class_linker.h" +#include "code_generation_data.h" +#include "code_generator.h" +#include "intern_table.h" +#include "mirror/object-inl.h" +#include "runtime.h" + +namespace art HIDDEN { + +void CodeGenerationData::EmitJitRoots( + /*out*/std::vector<Handle<mirror::Object>>* roots) { + DCHECK(roots->empty()); + roots->reserve(GetNumberOfJitRoots()); + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + size_t index = 0; + for (auto& entry : jit_string_roots_) { + // Update the `roots` with the string, and replace the address temporarily + // stored to the index in the table. + uint64_t address = entry.second; + roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address)); + DCHECK(roots->back() != nullptr); + DCHECK(roots->back()->IsString()); + entry.second = index; + // Ensure the string is strongly interned. This is a requirement on how the JIT + // handles strings. b/32995596 + class_linker->GetInternTable()->InternStrong(roots->back()->AsString()); + ++index; + } + for (auto& entry : jit_class_roots_) { + // Update the `roots` with the class, and replace the address temporarily + // stored to the index in the table. + uint64_t address = entry.second; + roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address)); + DCHECK(roots->back() != nullptr); + DCHECK(roots->back()->IsClass()); + entry.second = index; + ++index; + } +} + +} // namespace art diff --git a/compiler/optimizing/code_generation_data.h b/compiler/optimizing/code_generation_data.h new file mode 100644 index 0000000000..e78ba8f574 --- /dev/null +++ b/compiler/optimizing/code_generation_data.h @@ -0,0 +1,123 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATION_DATA_H_ +#define ART_COMPILER_OPTIMIZING_CODE_GENERATION_DATA_H_ + +#include <memory> + +#include "arch/instruction_set.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" +#include "code_generator.h" +#include "dex/string_reference.h" +#include "dex/type_reference.h" +#include "handle.h" +#include "mirror/class.h" +#include "mirror/object.h" +#include "mirror/string.h" +#include "stack_map_stream.h" + +namespace art HIDDEN { + +class CodeGenerationData : public DeletableArenaObject<kArenaAllocCodeGenerator> { + public: + static std::unique_ptr<CodeGenerationData> Create(ArenaStack* arena_stack, + InstructionSet instruction_set) { + ScopedArenaAllocator allocator(arena_stack); + void* memory = allocator.Alloc<CodeGenerationData>(kArenaAllocCodeGenerator); + return std::unique_ptr<CodeGenerationData>( + ::new (memory) CodeGenerationData(std::move(allocator), instruction_set)); + } + + ScopedArenaAllocator* GetScopedAllocator() { + return &allocator_; + } + + void AddSlowPath(SlowPathCode* slow_path) { + slow_paths_.emplace_back(std::unique_ptr<SlowPathCode>(slow_path)); + } + + ArrayRef<const std::unique_ptr<SlowPathCode>> GetSlowPaths() const { + return ArrayRef<const std::unique_ptr<SlowPathCode>>(slow_paths_); + } + + StackMapStream* GetStackMapStream() { return &stack_map_stream_; } + + void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string) { + jit_string_roots_.Overwrite(string_reference, + reinterpret_cast64<uint64_t>(string.GetReference())); + } + + uint64_t GetJitStringRootIndex(StringReference string_reference) const { + return jit_string_roots_.Get(string_reference); + } + + size_t GetNumberOfJitStringRoots() const { + return jit_string_roots_.size(); + } + + void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass) { + jit_class_roots_.Overwrite(type_reference, reinterpret_cast64<uint64_t>(klass.GetReference())); + } + + uint64_t GetJitClassRootIndex(TypeReference type_reference) const { + return jit_class_roots_.Get(type_reference); + } + + size_t GetNumberOfJitClassRoots() const { + return jit_class_roots_.size(); + } + + size_t GetNumberOfJitRoots() const { + return GetNumberOfJitStringRoots() + GetNumberOfJitClassRoots(); + } + + void EmitJitRoots(/*out*/std::vector<Handle<mirror::Object>>* roots) + REQUIRES_SHARED(Locks::mutator_lock_); + + private: + CodeGenerationData(ScopedArenaAllocator&& allocator, InstructionSet instruction_set) + : allocator_(std::move(allocator)), + stack_map_stream_(&allocator_, instruction_set), + slow_paths_(allocator_.Adapter(kArenaAllocCodeGenerator)), + jit_string_roots_(StringReferenceValueComparator(), + allocator_.Adapter(kArenaAllocCodeGenerator)), + jit_class_roots_(TypeReferenceValueComparator(), + allocator_.Adapter(kArenaAllocCodeGenerator)) { + slow_paths_.reserve(kDefaultSlowPathsCapacity); + } + + static constexpr size_t kDefaultSlowPathsCapacity = 8; + + ScopedArenaAllocator allocator_; + StackMapStream stack_map_stream_; + ScopedArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_; + + // Maps a StringReference (dex_file, string_index) to the index in the literal table. + // Entries are initially added with a pointer in the handle zone, and `EmitJitRoots` + // will compute all the indices. + ScopedArenaSafeMap<StringReference, uint64_t, StringReferenceValueComparator> jit_string_roots_; + + // Maps a ClassReference (dex_file, type_index) to the index in the literal table. + // Entries are initially added with a pointer in the handle zone, and `EmitJitRoots` + // will compute all the indices. + ScopedArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_; +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATION_DATA_H_ diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index c9f42b52f5..404a42771f 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -44,6 +44,7 @@ #include "base/leb128.h" #include "class_linker.h" #include "class_root-inl.h" +#include "code_generation_data.h" #include "dex/bytecode_utils.h" #include "dex/code_item_accessors-inl.h" #include "graph_visualizer.h" @@ -141,122 +142,6 @@ static bool CheckTypeConsistency(HInstruction* instruction) { return true; } -class CodeGenerator::CodeGenerationData : public DeletableArenaObject<kArenaAllocCodeGenerator> { - public: - static std::unique_ptr<CodeGenerationData> Create(ArenaStack* arena_stack, - InstructionSet instruction_set) { - ScopedArenaAllocator allocator(arena_stack); - void* memory = allocator.Alloc<CodeGenerationData>(kArenaAllocCodeGenerator); - return std::unique_ptr<CodeGenerationData>( - ::new (memory) CodeGenerationData(std::move(allocator), instruction_set)); - } - - ScopedArenaAllocator* GetScopedAllocator() { - return &allocator_; - } - - void AddSlowPath(SlowPathCode* slow_path) { - slow_paths_.emplace_back(std::unique_ptr<SlowPathCode>(slow_path)); - } - - ArrayRef<const std::unique_ptr<SlowPathCode>> GetSlowPaths() const { - return ArrayRef<const std::unique_ptr<SlowPathCode>>(slow_paths_); - } - - StackMapStream* GetStackMapStream() { return &stack_map_stream_; } - - void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string) { - jit_string_roots_.Overwrite(string_reference, - reinterpret_cast64<uint64_t>(string.GetReference())); - } - - uint64_t GetJitStringRootIndex(StringReference string_reference) const { - return jit_string_roots_.Get(string_reference); - } - - size_t GetNumberOfJitStringRoots() const { - return jit_string_roots_.size(); - } - - void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass) { - jit_class_roots_.Overwrite(type_reference, reinterpret_cast64<uint64_t>(klass.GetReference())); - } - - uint64_t GetJitClassRootIndex(TypeReference type_reference) const { - return jit_class_roots_.Get(type_reference); - } - - size_t GetNumberOfJitClassRoots() const { - return jit_class_roots_.size(); - } - - size_t GetNumberOfJitRoots() const { - return GetNumberOfJitStringRoots() + GetNumberOfJitClassRoots(); - } - - void EmitJitRoots(/*out*/std::vector<Handle<mirror::Object>>* roots) - REQUIRES_SHARED(Locks::mutator_lock_); - - private: - CodeGenerationData(ScopedArenaAllocator&& allocator, InstructionSet instruction_set) - : allocator_(std::move(allocator)), - stack_map_stream_(&allocator_, instruction_set), - slow_paths_(allocator_.Adapter(kArenaAllocCodeGenerator)), - jit_string_roots_(StringReferenceValueComparator(), - allocator_.Adapter(kArenaAllocCodeGenerator)), - jit_class_roots_(TypeReferenceValueComparator(), - allocator_.Adapter(kArenaAllocCodeGenerator)) { - slow_paths_.reserve(kDefaultSlowPathsCapacity); - } - - static constexpr size_t kDefaultSlowPathsCapacity = 8; - - ScopedArenaAllocator allocator_; - StackMapStream stack_map_stream_; - ScopedArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_; - - // Maps a StringReference (dex_file, string_index) to the index in the literal table. - // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots` - // will compute all the indices. - ScopedArenaSafeMap<StringReference, uint64_t, StringReferenceValueComparator> jit_string_roots_; - - // Maps a ClassReference (dex_file, type_index) to the index in the literal table. - // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots` - // will compute all the indices. - ScopedArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_; -}; - -void CodeGenerator::CodeGenerationData::EmitJitRoots( - /*out*/std::vector<Handle<mirror::Object>>* roots) { - DCHECK(roots->empty()); - roots->reserve(GetNumberOfJitRoots()); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - size_t index = 0; - for (auto& entry : jit_string_roots_) { - // Update the `roots` with the string, and replace the address temporarily - // stored to the index in the table. - uint64_t address = entry.second; - roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address)); - DCHECK(roots->back() != nullptr); - DCHECK(roots->back()->IsString()); - entry.second = index; - // Ensure the string is strongly interned. This is a requirement on how the JIT - // handles strings. b/32995596 - class_linker->GetInternTable()->InternStrong(roots->back()->AsString()); - ++index; - } - for (auto& entry : jit_class_roots_) { - // Update the `roots` with the class, and replace the address temporarily - // stored to the index in the table. - uint64_t address = entry.second; - roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address)); - DCHECK(roots->back() != nullptr); - DCHECK(roots->back()->IsClass()); - entry.second = index; - ++index; - } -} - ScopedArenaAllocator* CodeGenerator::GetScopedAllocator() { DCHECK(code_generation_data_ != nullptr); return code_generation_data_->GetScopedAllocator(); @@ -288,8 +173,8 @@ uint64_t CodeGenerator::GetJitClassRootIndex(TypeReference type_reference) { return code_generation_data_->GetJitClassRootIndex(type_reference); } -void CodeGenerator::EmitJitRootPatches(uint8_t* code ATTRIBUTE_UNUSED, - const uint8_t* roots_data ATTRIBUTE_UNUSED) { +void CodeGenerator::EmitJitRootPatches([[maybe_unused]] uint8_t* code, + [[maybe_unused]] const uint8_t* roots_data) { DCHECK(code_generation_data_ != nullptr); DCHECK_EQ(code_generation_data_->GetNumberOfJitStringRoots(), 0u); DCHECK_EQ(code_generation_data_->GetNumberOfJitClassRoots(), 0u); @@ -378,7 +263,7 @@ void CodeGenerator::InitializeCodeGenerationData() { code_generation_data_ = CodeGenerationData::Create(graph_->GetArenaStack(), GetInstructionSet()); } -void CodeGenerator::Compile(CodeAllocator* allocator) { +void CodeGenerator::Compile() { InitializeCodeGenerationData(); // The register allocator already called `InitializeCodeGeneration`, @@ -394,7 +279,8 @@ void CodeGenerator::Compile(CodeAllocator* allocator) { fpu_spill_mask_, GetGraph()->GetNumberOfVRegs(), GetGraph()->IsCompilingBaseline(), - GetGraph()->IsDebuggable()); + GetGraph()->IsDebuggable(), + GetGraph()->HasShouldDeoptimizeFlag()); size_t frame_start = GetAssembler()->CodeSize(); GenerateFrameEntry(); @@ -443,32 +329,28 @@ void CodeGenerator::Compile(CodeAllocator* allocator) { } // Finalize instructions in assember; - Finalize(allocator); + Finalize(); GetStackMapStream()->EndMethod(GetAssembler()->CodeSize()); } -void CodeGenerator::Finalize(CodeAllocator* allocator) { - size_t code_size = GetAssembler()->CodeSize(); - uint8_t* buffer = allocator->Allocate(code_size); - - MemoryRegion code(buffer, code_size); - GetAssembler()->FinalizeInstructions(code); +void CodeGenerator::Finalize() { + GetAssembler()->FinalizeCode(); } void CodeGenerator::EmitLinkerPatches( - ArenaVector<linker::LinkerPatch>* linker_patches ATTRIBUTE_UNUSED) { + [[maybe_unused]] ArenaVector<linker::LinkerPatch>* linker_patches) { // No linker patches by default. } -bool CodeGenerator::NeedsThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED) const { +bool CodeGenerator::NeedsThunkCode([[maybe_unused]] const linker::LinkerPatch& patch) const { // Code generators that create patches requiring thunk compilation should override this function. return false; } -void CodeGenerator::EmitThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED, - /*out*/ ArenaVector<uint8_t>* code ATTRIBUTE_UNUSED, - /*out*/ std::string* debug_name ATTRIBUTE_UNUSED) { +void CodeGenerator::EmitThunkCode([[maybe_unused]] const linker::LinkerPatch& patch, + [[maybe_unused]] /*out*/ ArenaVector<uint8_t>* code, + [[maybe_unused]] /*out*/ std::string* debug_name) { // Code generators that create patches requiring thunk compilation should override this function. LOG(FATAL) << "Unexpected call to EmitThunkCode()."; } @@ -745,8 +627,8 @@ void CodeGenerator::CreateUnresolvedFieldLocationSummary( locations->SetOut(calling_convention.GetReturnLocation(field_type)); } } else { - size_t set_index = is_instance ? 1 : 0; - if (DataType::IsFloatingPointType(field_type)) { + size_t set_index = is_instance ? 1 : 0; + if (DataType::IsFloatingPointType(field_type)) { // The set value comes from a float location while the calling convention // expects it in a regular register location. Allocate a temp for it and // make the transfer at codegen. @@ -1028,6 +910,12 @@ std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph, new (allocator) arm64::CodeGeneratorARM64(graph, compiler_options, stats)); } #endif +#ifdef ART_ENABLE_CODEGEN_riscv64 + case InstructionSet::kRiscv64: { + return std::unique_ptr<CodeGenerator>( + new (allocator) riscv64::CodeGeneratorRISCV64(graph, compiler_options, stats)); + } +#endif #ifdef ART_ENABLE_CODEGEN_x86 case InstructionSet::kX86: { return std::unique_ptr<CodeGenerator>( @@ -1834,8 +1722,8 @@ void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) { // Check to see if we have known failures that will cause us to have to bail out // to the runtime, and just generate the runtime call directly. - HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); - HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); + HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull(); + HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull(); // The positions must be non-negative. if ((src_pos != nullptr && src_pos->GetValue() < 0) || @@ -1845,7 +1733,7 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) { } // The length must be >= 0. - HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); + HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull(); if (length != nullptr) { int32_t len = length->GetValue(); if (len < 0) { diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 9872efaa4a..cd44fb3fa7 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -59,8 +59,12 @@ static int32_t constexpr kPrimIntMax = 0x7fffffff; // Maximum value for a primitive long. static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); -static const ReadBarrierOption gCompilerReadBarrierOption = - gUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier; +// Depending on configuration, `gUseReadBarrier` can be a static const variable. +// Static variable initialization order across different compilation units is not defined, +// so function is used instead of static variable `gCompilerReadBarrierOption`. +inline ReadBarrierOption GetCompilerReadBarrierOption() { + return gUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier; +} constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); constexpr size_t status_byte_offset = @@ -73,6 +77,7 @@ constexpr uint32_t shifted_initialized_value = enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); class Assembler; +class CodeGenerationData; class CodeGenerator; class CompilerOptions; class StackMapStream; @@ -82,18 +87,6 @@ namespace linker { class LinkerPatch; } // namespace linker -class CodeAllocator { - public: - CodeAllocator() {} - virtual ~CodeAllocator() {} - - virtual uint8_t* Allocate(size_t size) = 0; - virtual ArrayRef<const uint8_t> GetMemory() const = 0; - - private: - DISALLOW_COPY_AND_ASSIGN(CodeAllocator); -}; - class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> { public: explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) { @@ -200,7 +193,7 @@ class FieldAccessCallingConvention { class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { public: // Compiles the graph to executable instructions. - void Compile(CodeAllocator* allocator); + void Compile(); static std::unique_ptr<CodeGenerator> Create(HGraph* graph, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); @@ -221,7 +214,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { } virtual void Initialize() = 0; - virtual void Finalize(CodeAllocator* allocator); + virtual void Finalize(); virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches); virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const; virtual void EmitThunkCode(const linker::LinkerPatch& patch, @@ -278,14 +271,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; } - static uint32_t ComputeRegisterMask(const int* registers, size_t length) { - uint32_t mask = 0; - for (size_t i = 0, e = length; i < e; ++i) { - mask |= (1 << registers[i]); - } - return mask; - } - virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; virtual InstructionSet GetInstructionSet() const = 0; @@ -731,6 +716,11 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { static QuickEntrypointEnum GetArrayAllocationEntrypoint(HNewArray* new_array); static ScaleFactor ScaleFactorForType(DataType::Type type); + ArrayRef<const uint8_t> GetCode() const { + return ArrayRef<const uint8_t>(GetAssembler().CodeBufferBaseAddress(), + GetAssembler().CodeSize()); + } + protected: // Patch info used for recording locations of required linker patches and their targets, // i.e. target method, string, type or code identified by their dex file and index, @@ -761,6 +751,15 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { virtual HGraphVisitor* GetLocationBuilder() = 0; virtual HGraphVisitor* GetInstructionVisitor() = 0; + template <typename RegType> + static uint32_t ComputeRegisterMask(const RegType* registers, size_t length) { + uint32_t mask = 0; + for (size_t i = 0, e = length; i < e; ++i) { + mask |= (1 << registers[i]); + } + return mask; + } + // Returns the location of the first spilled entry for floating point registers, // relative to the stack pointer. uint32_t GetFpuSpillStart() const { @@ -814,6 +813,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { StackMapStream* GetStackMapStream(); + CodeGenerationData* GetCodeGenerationData() { + return code_generation_data_.get(); + } + void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string); uint64_t GetJitStringRootIndex(StringReference string_reference); void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass); @@ -848,8 +851,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { DisassemblyInformation* disasm_info_; private: - class CodeGenerationData; - void InitializeCodeGenerationData(); size_t GetStackOffsetOfSavedRegister(size_t index); void GenerateSlowPaths(); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 41db9a2542..89172aaebc 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -35,6 +35,7 @@ #include "interpreter/mterp/nterp.h" #include "intrinsics.h" #include "intrinsics_arm64.h" +#include "intrinsics_list.h" #include "intrinsics_utils.h" #include "linker/linker_patch.h" #include "lock_word.h" @@ -45,6 +46,7 @@ #include "optimizing/common_arm64.h" #include "optimizing/nodes.h" #include "thread.h" +#include "trace.h" #include "utils/arm64/assembler_arm64.h" #include "utils/assembler.h" #include "utils/stack_checks.h" @@ -88,6 +90,9 @@ using helpers::VIXLRegCodeFromART; using helpers::WRegisterFrom; using helpers::XRegisterFrom; +// TODO(mythria): Expand SystemRegister in vixl to include this value. +uint16_t SYS_CNTVCT_EL0 = SystemRegisterEncoder<1, 3, 14, 0, 2>::value; + // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump // table version generates 7 instructions and num_entries literals. Compare/jump sequence will // generates less code/data with a small num_entries. @@ -936,6 +941,7 @@ Location CriticalNativeCallingConventionVisitorARM64::GetMethodLocation() const } namespace detail { + // Mark which intrinsics we don't have handcrafted code for. template <Intrinsics T> struct IsUnimplemented { @@ -950,15 +956,13 @@ struct IsUnimplemented { UNIMPLEMENTED_INTRINSIC_LIST_ARM64(TRUE_OVERRIDE) #undef TRUE_OVERRIDE -#include "intrinsics_list.h" static constexpr bool kIsIntrinsicUnimplemented[] = { - false, // kNone + false, // kNone #define IS_UNIMPLEMENTED(Intrinsic, ...) \ - IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, - INTRINSICS_LIST(IS_UNIMPLEMENTED) + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + ART_INTRINSICS_LIST(IS_UNIMPLEMENTED) #undef IS_UNIMPLEMENTED }; -#undef INTRINSICS_LIST } // namespace detail @@ -995,14 +999,7 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - uint32_literals_(std::less<uint32_t>(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - uint64_literals_(std::less<uint64_t>(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - jit_string_patches_(StringReferenceValueComparator(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - jit_class_patches_(TypeReferenceValueComparator(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + jit_patches_(&assembler_, graph->GetAllocator()), jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { // Save the link register (containing the return address) to mimic Quick. @@ -1036,7 +1033,7 @@ void CodeGeneratorARM64::EmitJumpTables() { } } -void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { +void CodeGeneratorARM64::Finalize() { EmitJumpTables(); // Emit JIT baker read barrier slow paths. @@ -1051,11 +1048,11 @@ void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { // Ensure we emit the literal pool. __ FinalizeCode(); - CodeGenerator::Finalize(allocator); + CodeGenerator::Finalize(); // Verify Baker read barrier linker patches. if (kIsDebugBuild) { - ArrayRef<const uint8_t> code = allocator->GetMemory(); + ArrayRef<const uint8_t> code(GetCode()); for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { DCHECK(info.label.IsBound()); uint32_t literal_offset = info.label.GetLocation(); @@ -1192,8 +1189,9 @@ void LocationsBuilderARM64::VisitMethodExitHook(HMethodExitHook* method_hook) { void InstructionCodeGeneratorARM64::GenerateMethodEntryExitHook(HInstruction* instruction) { MacroAssembler* masm = GetVIXLAssembler(); UseScratchRegisterScope temps(masm); - Register temp = temps.AcquireX(); - Register value = temps.AcquireW(); + Register addr = temps.AcquireX(); + Register index = temps.AcquireX(); + Register value = index.W(); SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARM64(instruction); @@ -1213,9 +1211,44 @@ void InstructionCodeGeneratorARM64::GenerateMethodEntryExitHook(HInstruction* in MemberOffset offset = instruction->IsMethodExitHook() ? instrumentation::Instrumentation::HaveMethodExitListenersOffset() : instrumentation::Instrumentation::HaveMethodEntryListenersOffset(); - __ Mov(temp, address + offset.Int32Value()); - __ Ldrb(value, MemOperand(temp, 0)); - __ Cbnz(value, slow_path->GetEntryLabel()); + __ Mov(addr, address + offset.Int32Value()); + __ Ldrb(value, MemOperand(addr, 0)); + __ Cmp(value, Operand(instrumentation::Instrumentation::kFastTraceListeners)); + // Check if there are any method entry / exit listeners. If no, continue. + __ B(lt, slow_path->GetExitLabel()); + // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners. + // If yes, just take the slow path. + __ B(gt, slow_path->GetEntryLabel()); + + // Check if there is place in the buffer to store a new entry, if no, take slow path. + uint32_t trace_buffer_index_offset = + Thread::TraceBufferIndexOffset<kArm64PointerSize>().Int32Value(); + __ Ldr(index, MemOperand(tr, trace_buffer_index_offset)); + __ Subs(index, index, kNumEntriesForWallClock); + __ B(lt, slow_path->GetEntryLabel()); + + // Update the index in the `Thread`. + __ Str(index, MemOperand(tr, trace_buffer_index_offset)); + // Calculate the entry address in the buffer. + // addr = base_addr + sizeof(void*) * index; + __ Ldr(addr, MemOperand(tr, Thread::TraceBufferPtrOffset<kArm64PointerSize>().SizeValue())); + __ ComputeAddress(addr, MemOperand(addr, index, LSL, TIMES_8)); + + Register tmp = index; + // Record method pointer and trace action. + __ Ldr(tmp, MemOperand(sp, 0)); + // Use last two bits to encode trace method action. For MethodEntry it is 0 + // so no need to set the bits since they are 0 already. + if (instruction->IsMethodExitHook()) { + DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4)); + static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0); + static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1); + __ Orr(tmp, tmp, Operand(enum_cast<int32_t>(TraceAction::kTraceMethodExit))); + } + __ Str(tmp, MemOperand(addr, kMethodOffsetInBytes)); + // Record the timestamp. + __ Mrs(tmp, (SystemRegister)SYS_CNTVCT_EL0); + __ Str(tmp, MemOperand(addr, kTimestampOffsetInBytes)); __ Bind(slow_path->GetExitLabel()); } @@ -1264,7 +1297,7 @@ void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) { UseScratchRegisterScope temps(masm); Register temp = temps.AcquireX(); Register counter = temps.AcquireW(); - __ Ldr(temp, DeduplicateUint64Literal(address)); + __ Ldr(temp, jit_patches_.DeduplicateUint64Literal(address)); __ Ldrh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); __ Cbz(counter, slow_path->GetEntryLabel()); __ Add(counter, counter, -1); @@ -1532,15 +1565,15 @@ size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_ return kArm64WordSize; } -size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, - uint32_t reg_id ATTRIBUTE_UNUSED) { +size_t CodeGeneratorARM64::SaveFloatingPointRegister([[maybe_unused]] size_t stack_index, + [[maybe_unused]] uint32_t reg_id) { LOG(FATAL) << "FP registers shouldn't be saved/restored individually, " << "use SaveRestoreLiveRegistersHelper"; UNREACHABLE(); } -size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, - uint32_t reg_id ATTRIBUTE_UNUSED) { +size_t CodeGeneratorARM64::RestoreFloatingPointRegister([[maybe_unused]] size_t stack_index, + [[maybe_unused]] uint32_t reg_id) { LOG(FATAL) << "FP registers shouldn't be saved/restored individually, " << "use SaveRestoreLiveRegistersHelper"; UNREACHABLE(); @@ -3647,7 +3680,7 @@ void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) { } void InstructionCodeGeneratorARM64::VisitDoubleConstant( - HDoubleConstant* constant ATTRIBUTE_UNUSED) { + [[maybe_unused]] HDoubleConstant* constant) { // Will be generated at use site. } @@ -3655,8 +3688,7 @@ void LocationsBuilderARM64::VisitExit(HExit* exit) { exit->SetLocations(nullptr); } -void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { -} +void InstructionCodeGeneratorARM64::VisitExit([[maybe_unused]] HExit* exit) {} void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) { LocationSummary* locations = @@ -3664,7 +3696,7 @@ void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARM64::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) { // Will be generated at use site. } @@ -3747,7 +3779,7 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct // The condition instruction has been materialized, compare the output to 0. Location cond_val = instruction->GetLocations()->InAt(condition_input_index); DCHECK(cond_val.IsRegister()); - if (true_target == nullptr) { + if (true_target == nullptr) { __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target); } else { __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target); @@ -3876,7 +3908,7 @@ static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) { } static inline Condition GetConditionForSelect(HCondition* condition) { - IfCondition cond = condition->AsCondition()->GetCondition(); + IfCondition cond = condition->GetCondition(); return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias()) : ARM64Condition(cond); } @@ -3888,8 +3920,8 @@ void LocationsBuilderARM64::VisitSelect(HSelect* select) { locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { - HConstant* cst_true_value = select->GetTrueValue()->AsConstant(); - HConstant* cst_false_value = select->GetFalseValue()->AsConstant(); + HConstant* cst_true_value = select->GetTrueValue()->AsConstantOrNull(); + HConstant* cst_false_value = select->GetFalseValue()->AsConstantOrNull(); bool is_true_value_constant = cst_true_value != nullptr; bool is_false_value_constant = cst_false_value != nullptr; // Ask VIXL whether we should synthesize constants in registers. @@ -4308,7 +4340,6 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { } else { locations->SetInAt(1, Location::RequiresRegister()); } - // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64. locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -4478,12 +4509,11 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { kWithoutReadBarrier); // /* HeapReference<Class> */ temp = temp->iftable_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - temp_loc, - iftable_offset, - maybe_temp2_loc, - kWithoutReadBarrier); + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + iftable_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Iftable is never null. __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset)); // Loop through the iftable and check if any class matches. @@ -4525,7 +4555,7 @@ void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARM64::VisitIntConstant([[maybe_unused]] HIntConstant* constant) { // Will be generated at use site. } @@ -4534,7 +4564,7 @@ void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARM64::VisitNullConstant([[maybe_unused]] HNullConstant* constant) { // Will be generated at use site. } @@ -4709,8 +4739,8 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codege } HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch( - const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - ArtMethod* method ATTRIBUTE_UNUSED) { + const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, + [[maybe_unused]] ArtMethod* method) { // On ARM64 we support all dispatch types. return desired_dispatch_info; } @@ -4749,7 +4779,8 @@ void CodeGeneratorARM64::LoadMethod(MethodLoadKind load_kind, Location temp, HIn case MethodLoadKind::kJitDirectAddress: { // Load method address from literal pool. __ Ldr(XRegisterFrom(temp), - DeduplicateUint64Literal(reinterpret_cast<uint64_t>(invoke->GetResolvedMethod()))); + jit_patches_.DeduplicateUint64Literal( + reinterpret_cast<uint64_t>(invoke->GetResolvedMethod()))); break; } case MethodLoadKind::kRuntimeCall: { @@ -4775,14 +4806,12 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall( __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset)); break; } - case MethodLoadKind::kRecursive: { + case MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex()); break; - } - case MethodLoadKind::kRuntimeCall: { + case MethodLoadKind::kRuntimeCall: GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); return; // No code pointer retrieval; the runtime performs the call directly. - } case MethodLoadKind::kBootImageLinkTimePcRelative: DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()); if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) { @@ -4798,10 +4827,9 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall( break; } FALLTHROUGH_INTENDED; - default: { + default: LoadMethod(invoke->GetMethodLoadKind(), temp, invoke); break; - } } auto call_lr = [&]() { @@ -4906,6 +4934,7 @@ void CodeGeneratorARM64::GenerateVirtualCall( } // Instead of simply (possibly) unpoisoning `temp` here, we should // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an // intermediate/temporary reference and because the current // concurrent copying collector keeps the from-space memory // intact/accessible until the end of the marking phase (the @@ -5090,25 +5119,8 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch( return label; } -vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral( - uint64_t address) { - return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address)); -} - -vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral( - const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) { - ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); - return jit_string_patches_.GetOrCreate( - StringReference(&dex_file, string_index), - [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); }); -} - -vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral( - const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) { - ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); - return jit_class_patches_.GetOrCreate( - TypeReference(&dex_file, type_index), - [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); }); +void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { + jit_patches_.EmitJitRootPatches(code, roots_data, *GetCodeGenerationData()); } void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, @@ -5332,19 +5344,7 @@ void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch, assembler.FinalizeCode(); code->resize(assembler.CodeSize()); MemoryRegion code_region(code->data(), code->size()); - assembler.FinalizeInstructions(code_region); -} - -vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) { - return uint32_literals_.GetOrCreate( - value, - [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); }); -} - -vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) { - return uint64_literals_.GetOrCreate( - value, - [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); }); + assembler.CopyInstructions(code_region); } void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { @@ -5370,13 +5370,8 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { return; } - { - // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there - // are no pools emitted. - EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); - codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); - DCHECK(!codegen_->IsLeafMethod()); - } + codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); + DCHECK(!codegen_->IsLeafMethod()); codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } @@ -5434,7 +5429,9 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { locations->SetInAt(0, Location::RequiresRegister()); } locations->SetOut(Location::RequiresRegister()); - if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) { + if (load_kind == HLoadClass::LoadKind::kBssEntry || + load_kind == HLoadClass::LoadKind::kBssEntryPublic || + load_kind == HLoadClass::LoadKind::kBssEntryPackage) { if (!gUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution or initialization and marking to save everything we need. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); @@ -5460,9 +5457,8 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA Location out_loc = cls->GetLocations()->Out(); Register out = OutputRegister(cls); - const ReadBarrierOption read_barrier_option = cls->IsInBootImage() - ? kWithoutReadBarrier - : gCompilerReadBarrierOption; + const ReadBarrierOption read_barrier_option = + cls->IsInBootImage() ? kWithoutReadBarrier : GetCompilerReadBarrierOption(); bool generate_null_check = false; switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: { @@ -5600,7 +5596,7 @@ void LocationsBuilderARM64::VisitClearException(HClearException* clear) { new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); } -void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARM64::VisitClearException([[maybe_unused]] HClearException* clear) { __ Str(wzr, GetExceptionTlsAddress()); } @@ -5685,7 +5681,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD temp, /* offset placeholder */ 0u, ldr_label, - gCompilerReadBarrierOption); + GetCompilerReadBarrierOption()); SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load); codegen_->AddSlowPath(slow_path); @@ -5709,14 +5705,13 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD out.X(), /* offset= */ 0, /* fixup_label= */ nullptr, - gCompilerReadBarrierOption); + GetCompilerReadBarrierOption()); return; } default: break; } - // TODO: Re-add the compiler code to do string dex cache lookup again. InvokeRuntimeCallingConvention calling_convention; DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode()); __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_); @@ -5730,7 +5725,7 @@ void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARM64::VisitLongConstant([[maybe_unused]] HLongConstant* constant) { // Will be generated at use site. } @@ -5930,7 +5925,7 @@ void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) { HandleBinaryOp(instruction); } -void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { +void LocationsBuilderARM64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) { LOG(FATAL) << "Unreachable"; } @@ -5957,7 +5952,7 @@ void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) { } void InstructionCodeGeneratorARM64::VisitParameterValue( - HParameterValue* instruction ATTRIBUTE_UNUSED) { + [[maybe_unused]] HParameterValue* instruction) { // Nothing to do, the parameter is already at its location. } @@ -5968,7 +5963,7 @@ void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) { } void InstructionCodeGeneratorARM64::VisitCurrentMethod( - HCurrentMethod* instruction ATTRIBUTE_UNUSED) { + [[maybe_unused]] HCurrentMethod* instruction) { // Nothing to do, the method is already at its location. } @@ -5980,7 +5975,7 @@ void LocationsBuilderARM64::VisitPhi(HPhi* instruction) { locations->SetOut(Location::Any()); } -void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARM64::VisitPhi([[maybe_unused]] HPhi* instruction) { LOG(FATAL) << "Unreachable"; } @@ -6175,7 +6170,7 @@ void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor } void InstructionCodeGeneratorARM64::VisitConstructorFence( - HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + [[maybe_unused]] HConstructorFence* constructor_fence) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } @@ -6215,7 +6210,7 @@ void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) { instruction->SetLocations(nullptr); } -void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARM64::VisitReturnVoid([[maybe_unused]] HReturnVoid* instruction) { codegen_->GenerateFrameExit(); } @@ -6353,6 +6348,9 @@ void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) { // In suspend check slow path, usually there are no caller-save registers at all. // If SIMD instructions are present, however, we force spilling all live SIMD // registers in full width (since the runtime only saves/restores lower part). + // Note that only a suspend check can see live SIMD registers. In the + // loop optimization, we make sure this does not happen for any other slow + // path. locations->SetCustomSlowPathCallerSaves( GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty()); } @@ -6467,12 +6465,12 @@ void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) { HandleBinaryOp(instruction); } -void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { +void LocationsBuilderARM64::VisitBoundType([[maybe_unused]] HBoundType* instruction) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; } -void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARM64::VisitBoundType([[maybe_unused]] HBoundType* instruction) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; } @@ -7018,32 +7016,6 @@ void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instructi } } -static void PatchJitRootUse(uint8_t* code, - const uint8_t* roots_data, - vixl::aarch64::Literal<uint32_t>* literal, - uint64_t index_in_table) { - uint32_t literal_offset = literal->GetOffset(); - uintptr_t address = - reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); - uint8_t* data = code + literal_offset; - reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address); -} - -void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { - for (const auto& entry : jit_string_patches_) { - const StringReference& string_reference = entry.first; - vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; - uint64_t index_in_table = GetJitStringRootIndex(string_reference); - PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); - } - for (const auto& entry : jit_class_patches_) { - const TypeReference& type_reference = entry.first; - vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; - uint64_t index_in_table = GetJitClassRootIndex(type_reference); - PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); - } -} - MemOperand InstructionCodeGeneratorARM64::VecNEONAddress( HVecMemoryOperation* instruction, UseScratchRegisterScope* temps_scope, diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 6190364d1d..957f85aa21 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -26,6 +26,7 @@ #include "dex/string_reference.h" #include "dex/type_reference.h" #include "driver/compiler_options.h" +#include "jit_patches_arm64.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/arm64/assembler_arm64.h" @@ -50,30 +51,29 @@ class CodeGeneratorARM64; // Use a local definition to prevent copying mistakes. static constexpr size_t kArm64WordSize = static_cast<size_t>(kArm64PointerSize); -// These constants are used as an approximate margin when emission of veneer and literal pools +// This constant is used as an approximate margin when emission of veneer and literal pools // must be blocked. static constexpr int kMaxMacroInstructionSizeInBytes = 15 * vixl::aarch64::kInstructionSize; -static constexpr int kInvokeCodeMarginSizeInBytes = 6 * kMaxMacroInstructionSizeInBytes; static const vixl::aarch64::Register kParameterCoreRegisters[] = { - vixl::aarch64::x1, - vixl::aarch64::x2, - vixl::aarch64::x3, - vixl::aarch64::x4, - vixl::aarch64::x5, - vixl::aarch64::x6, - vixl::aarch64::x7 + vixl::aarch64::x1, + vixl::aarch64::x2, + vixl::aarch64::x3, + vixl::aarch64::x4, + vixl::aarch64::x5, + vixl::aarch64::x6, + vixl::aarch64::x7 }; static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); static const vixl::aarch64::VRegister kParameterFPRegisters[] = { - vixl::aarch64::d0, - vixl::aarch64::d1, - vixl::aarch64::d2, - vixl::aarch64::d3, - vixl::aarch64::d4, - vixl::aarch64::d5, - vixl::aarch64::d6, - vixl::aarch64::d7 + vixl::aarch64::d0, + vixl::aarch64::d1, + vixl::aarch64::d2, + vixl::aarch64::d3, + vixl::aarch64::d4, + vixl::aarch64::d5, + vixl::aarch64::d6, + vixl::aarch64::d7 }; static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters); @@ -116,7 +116,7 @@ const vixl::aarch64::CPURegList callee_saved_core_registers( vixl::aarch64::CPURegister::kRegister, vixl::aarch64::kXRegSize, (kReserveMarkingRegister ? vixl::aarch64::x21.GetCode() : vixl::aarch64::x20.GetCode()), - vixl::aarch64::x30.GetCode()); + vixl::aarch64::x30.GetCode()); const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegister::kVRegister, vixl::aarch64::kDRegSize, vixl::aarch64::d8.GetCode(), @@ -192,34 +192,34 @@ class JumpTableARM64 : public DeletableArenaObject<kArenaAllocSwitchTable> { DISALLOW_COPY_AND_ASSIGN(JumpTableARM64); }; -static const vixl::aarch64::Register kRuntimeParameterCoreRegisters[] = - { vixl::aarch64::x0, - vixl::aarch64::x1, - vixl::aarch64::x2, - vixl::aarch64::x3, - vixl::aarch64::x4, - vixl::aarch64::x5, - vixl::aarch64::x6, - vixl::aarch64::x7 }; +static const vixl::aarch64::Register kRuntimeParameterCoreRegisters[] = { + vixl::aarch64::x0, + vixl::aarch64::x1, + vixl::aarch64::x2, + vixl::aarch64::x3, + vixl::aarch64::x4, + vixl::aarch64::x5, + vixl::aarch64::x6, + vixl::aarch64::x7 +}; static constexpr size_t kRuntimeParameterCoreRegistersLength = arraysize(kRuntimeParameterCoreRegisters); -static const vixl::aarch64::VRegister kRuntimeParameterFpuRegisters[] = - { vixl::aarch64::d0, - vixl::aarch64::d1, - vixl::aarch64::d2, - vixl::aarch64::d3, - vixl::aarch64::d4, - vixl::aarch64::d5, - vixl::aarch64::d6, - vixl::aarch64::d7 }; +static const vixl::aarch64::VRegister kRuntimeParameterFpuRegisters[] = { + vixl::aarch64::d0, + vixl::aarch64::d1, + vixl::aarch64::d2, + vixl::aarch64::d3, + vixl::aarch64::d4, + vixl::aarch64::d5, + vixl::aarch64::d6, + vixl::aarch64::d7 +}; static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterCoreRegisters); class InvokeRuntimeCallingConvention : public CallingConvention<vixl::aarch64::Register, vixl::aarch64::VRegister> { public: - static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); - InvokeRuntimeCallingConvention() : CallingConvention(kRuntimeParameterCoreRegisters, kRuntimeParameterCoreRegistersLength, @@ -304,16 +304,16 @@ class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention { Location GetFieldIndexLocation() const override { return helpers::LocationFrom(vixl::aarch64::x0); } - Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { + Location GetReturnLocation([[maybe_unused]] DataType::Type type) const override { return helpers::LocationFrom(vixl::aarch64::x0); } - Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, + Location GetSetValueLocation([[maybe_unused]] DataType::Type type, bool is_instance) const override { return is_instance ? helpers::LocationFrom(vixl::aarch64::x2) : helpers::LocationFrom(vixl::aarch64::x1); } - Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { + Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override { return helpers::LocationFrom(vixl::aarch64::d0); } @@ -551,12 +551,31 @@ class InstructionCodeGeneratorARM64Sve : public InstructionCodeGeneratorARM64 { // register size (full SIMD register is used). void ValidateVectorLength(HVecOperation* instr) const; - // Returns default predicate register which is used as governing vector predicate - // to implement predicated loop execution. + vixl::aarch64::PRegister GetVecGoverningPReg(HVecOperation* instr) { + return GetVecPredSetFixedOutPReg(instr->GetGoverningPredicate()); + } + + // Returns a fixed p-reg for predicate setting instruction. + // + // Currently we only support diamond CF loops for predicated vectorization; also we don't have + // register allocator support for vector predicates. Thus we use fixed P-regs for loop main, + // True and False predicates as a temporary solution. // - // TODO: This is a hack to be addressed when register allocator supports SIMD types. - static vixl::aarch64::PRegister LoopPReg() { - return vixl::aarch64::p0; + // TODO: Support SIMD types and registers in ART. + static vixl::aarch64::PRegister GetVecPredSetFixedOutPReg(HVecPredSetOperation* instr) { + if (instr->IsVecPredWhile() || instr->IsVecPredSetAll()) { + // VecPredWhile and VecPredSetAll live ranges never overlap due to the current vectorization + // scheme: the former only is live inside a vectorized loop and the later is never in a + // loop and never spans across loops. + return vixl::aarch64::p0; + } else if (instr->IsVecPredNot()) { + // This relies on the fact that we only use PredNot manually in the autovectorizer, + // so there is only one of them in each loop. + return vixl::aarch64::p1; + } else { + DCHECK(instr->IsVecCondition()); + return vixl::aarch64::p2; + } } }; @@ -698,7 +717,7 @@ class CodeGeneratorARM64 : public CodeGenerator { return jump_tables_.back().get(); } - void Finalize(CodeAllocator* allocator) override; + void Finalize() override; // Code generation helpers. void MoveConstant(vixl::aarch64::CPURegister destination, HConstant* constant); @@ -737,9 +756,7 @@ class CodeGeneratorARM64 : public CodeGenerator { ParallelMoveResolverARM64* GetMoveResolver() override { return &move_resolver_; } - bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override { - return false; - } + bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; } // Check if the desired_string_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. @@ -838,13 +855,21 @@ class CodeGeneratorARM64 : public CodeGenerator { // the associated patch for AOT or slow path for JIT. void EmitBakerReadBarrierCbnz(uint32_t custom_data); - vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address); + vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address) { + return jit_patches_.DeduplicateBootImageAddressLiteral(address); + } vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(const DexFile& dex_file, dex::StringIndex string_index, - Handle<mirror::String> handle); + Handle<mirror::String> handle) { + return jit_patches_.DeduplicateJitStringLiteral( + dex_file, string_index, handle, GetCodeGenerationData()); + } vixl::aarch64::Literal<uint32_t>* DeduplicateJitClassLiteral(const DexFile& dex_file, - dex::TypeIndex string_index, - Handle<mirror::Class> handle); + dex::TypeIndex class_index, + Handle<mirror::Class> handle) { + return jit_patches_.DeduplicateJitClassLiteral( + dex_file, class_index, handle, GetCodeGenerationData()); + } void EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, vixl::aarch64::Register reg); void EmitAddPlaceholder(vixl::aarch64::Label* fixup_label, @@ -1074,18 +1099,6 @@ class CodeGeneratorARM64 : public CodeGenerator { uint32_t encoded_data, /*out*/ std::string* debug_name); - using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>; - using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>; - using StringToLiteralMap = ArenaSafeMap<StringReference, - vixl::aarch64::Literal<uint32_t>*, - StringReferenceValueComparator>; - using TypeToLiteralMap = ArenaSafeMap<TypeReference, - vixl::aarch64::Literal<uint32_t>*, - TypeReferenceValueComparator>; - - vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value); - vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value); - // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. struct PcRelativePatchInfo : PatchInfo<vixl::aarch64::Label> { @@ -1158,14 +1171,7 @@ class CodeGeneratorARM64 : public CodeGenerator { // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; - // Deduplication map for 32-bit literals, used for JIT for boot image addresses. - Uint32ToLiteralMap uint32_literals_; - // Deduplication map for 64-bit literals, used for JIT for method address or method code. - Uint64ToLiteralMap uint64_literals_; - // Patches for string literals in JIT compiled code. - StringToLiteralMap jit_string_patches_; - // Patches for class literals in JIT compiled code. - TypeToLiteralMap jit_class_patches_; + JitPatchesARM64 jit_patches_; // Baker read barrier slow paths, mapping custom data (uint32_t) to label. // Wrap the label to work around vixl::aarch64::Label being non-copyable diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index d69e77045b..78bf316c17 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -33,6 +33,7 @@ #include "interpreter/mterp/nterp.h" #include "intrinsics.h" #include "intrinsics_arm_vixl.h" +#include "intrinsics_list.h" #include "intrinsics_utils.h" #include "linker/linker_patch.h" #include "mirror/array-inl.h" @@ -40,6 +41,7 @@ #include "mirror/var_handle.h" #include "scoped_thread_state_change-inl.h" #include "thread.h" +#include "trace.h" #include "utils/arm/assembler_arm_vixl.h" #include "utils/arm/managed_register_arm.h" #include "utils/assembler.h" @@ -1102,27 +1104,27 @@ static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) { } // Saves the register in the stack. Returns the size taken on stack. -size_t CodeGeneratorARMVIXL::SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED, - uint32_t reg_id ATTRIBUTE_UNUSED) { +size_t CodeGeneratorARMVIXL::SaveCoreRegister([[maybe_unused]] size_t stack_index, + [[maybe_unused]] uint32_t reg_id) { TODO_VIXL32(FATAL); UNREACHABLE(); } // Restores the register from the stack. Returns the size taken on stack. -size_t CodeGeneratorARMVIXL::RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED, - uint32_t reg_id ATTRIBUTE_UNUSED) { +size_t CodeGeneratorARMVIXL::RestoreCoreRegister([[maybe_unused]] size_t stack_index, + [[maybe_unused]] uint32_t reg_id) { TODO_VIXL32(FATAL); UNREACHABLE(); } -size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, - uint32_t reg_id ATTRIBUTE_UNUSED) { +size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister([[maybe_unused]] size_t stack_index, + [[maybe_unused]] uint32_t reg_id) { TODO_VIXL32(FATAL); UNREACHABLE(); } -size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, - uint32_t reg_id ATTRIBUTE_UNUSED) { +size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister([[maybe_unused]] size_t stack_index, + [[maybe_unused]] uint32_t reg_id) { TODO_VIXL32(FATAL); UNREACHABLE(); } @@ -1908,6 +1910,7 @@ vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction, } namespace detail { + // Mark which intrinsics we don't have handcrafted code for. template <Intrinsics T> struct IsUnimplemented { @@ -1922,15 +1925,13 @@ struct IsUnimplemented { UNIMPLEMENTED_INTRINSIC_LIST_ARM(TRUE_OVERRIDE) #undef TRUE_OVERRIDE -#include "intrinsics_list.h" static constexpr bool kIsIntrinsicUnimplemented[] = { - false, // kNone + false, // kNone #define IS_UNIMPLEMENTED(Intrinsic, ...) \ - IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, - INTRINSICS_LIST(IS_UNIMPLEMENTED) + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + ART_INTRINSICS_LIST(IS_UNIMPLEMENTED) #undef IS_UNIMPLEMENTED }; -#undef INTRINSICS_LIST } // namespace detail @@ -2024,7 +2025,7 @@ void CodeGeneratorARMVIXL::FixJumpTables() { #define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()-> // NOLINT -void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) { +void CodeGeneratorARMVIXL::Finalize() { FixJumpTables(); // Emit JIT baker read barrier slow paths. @@ -2037,11 +2038,11 @@ void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) { } GetAssembler()->FinalizeCode(); - CodeGenerator::Finalize(allocator); + CodeGenerator::Finalize(); // Verify Baker read barrier linker patches. if (kIsDebugBuild) { - ArrayRef<const uint8_t> code = allocator->GetMemory(); + ArrayRef<const uint8_t> code(GetCode()); for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { DCHECK(info.label.IsBound()); uint32_t literal_offset = info.label.GetLocation(); @@ -2188,11 +2189,16 @@ void LocationsBuilderARMVIXL::VisitMethodExitHook(HMethodExitHook* method_hook) LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); locations->SetInAt(0, parameter_visitor_.GetReturnLocation(method_hook->InputAt(0)->GetType())); + // We need three temporary registers, two to load the timestamp counter (64-bit value) and one to + // compute the address to store the timestamp counter. + locations->AddRegisterTemps(3); } void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction* instruction) { - UseScratchRegisterScope temps(GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); + LocationSummary* locations = instruction->GetLocations(); + vixl32::Register addr = RegisterFrom(locations->GetTemp(0)); + vixl32::Register value = RegisterFrom(locations->GetTemp(1)); + vixl32::Register tmp = RegisterFrom(locations->GetTemp(2)); SlowPathCodeARMVIXL* slow_path = new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARMVIXL(instruction); @@ -2204,20 +2210,61 @@ void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction* // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is // disabled in debuggable runtime. The other bit is used when this method itself requires a // deoptimization due to redefinition. So it is safe to just check for non-zero value here. - GetAssembler()->LoadFromOffset(kLoadWord, - temp, - sp, - codegen_->GetStackOffsetOfShouldDeoptimizeFlag()); - __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel()); + GetAssembler()->LoadFromOffset( + kLoadWord, value, sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()); + __ CompareAndBranchIfNonZero(value, slow_path->GetEntryLabel()); } MemberOffset offset = instruction->IsMethodExitHook() ? instrumentation::Instrumentation::HaveMethodExitListenersOffset() : instrumentation::Instrumentation::HaveMethodEntryListenersOffset(); uint32_t address = reinterpret_cast32<uint32_t>(Runtime::Current()->GetInstrumentation()); - __ Mov(temp, address + offset.Int32Value()); - __ Ldrb(temp, MemOperand(temp, 0)); - __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel()); + __ Mov(addr, address + offset.Int32Value()); + __ Ldrb(value, MemOperand(addr, 0)); + __ Cmp(value, instrumentation::Instrumentation::kFastTraceListeners); + // Check if there are any trace method entry / exit listeners. If no, continue. + __ B(lt, slow_path->GetExitLabel()); + // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners. + // If yes, just take the slow path. + __ B(gt, slow_path->GetEntryLabel()); + + // Check if there is place in the buffer to store a new entry, if no, take slow path. + uint32_t trace_buffer_index_offset = + Thread::TraceBufferIndexOffset<kArmPointerSize>().Int32Value(); + vixl32::Register index = value; + __ Ldr(index, MemOperand(tr, trace_buffer_index_offset)); + __ Subs(index, index, kNumEntriesForWallClock); + __ B(lt, slow_path->GetEntryLabel()); + + // Update the index in the `Thread`. + __ Str(index, MemOperand(tr, trace_buffer_index_offset)); + // Calculate the entry address in the buffer. + // addr = base_addr + sizeof(void*) * index + __ Ldr(addr, MemOperand(tr, Thread::TraceBufferPtrOffset<kArmPointerSize>().SizeValue())); + __ Add(addr, addr, Operand(index, LSL, TIMES_4)); + + // Record method pointer and trace action. + __ Ldr(tmp, MemOperand(sp, 0)); + // Use last two bits to encode trace method action. For MethodEntry it is 0 + // so no need to set the bits since they are 0 already. + if (instruction->IsMethodExitHook()) { + DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4)); + static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0); + static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1); + __ Orr(tmp, tmp, Operand(enum_cast<int32_t>(TraceAction::kTraceMethodExit))); + } + __ Str(tmp, MemOperand(addr, kMethodOffsetInBytes)); + + vixl32::Register tmp1 = index; + // See Architecture Reference Manual ARMv7-A and ARMv7-R edition section B4.1.34. + __ Mrrc(/* lower 32-bit */ tmp, + /* higher 32-bit */ tmp1, + /* coproc= */ 15, + /* opc1= */ 1, + /* crm= */ 14); + static_assert(kHighTimestampOffsetInBytes == + kTimestampOffsetInBytes + static_cast<uint32_t>(kRuntimePointerSize)); + __ Strd(tmp, tmp1, MemOperand(addr, kTimestampOffsetInBytes)); __ Bind(slow_path->GetExitLabel()); } @@ -2228,7 +2275,11 @@ void InstructionCodeGeneratorARMVIXL::VisitMethodExitHook(HMethodExitHook* instr } void LocationsBuilderARMVIXL::VisitMethodEntryHook(HMethodEntryHook* method_hook) { - new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); + // We need three temporary registers, two to load the timestamp counter (64-bit value) and one to + // compute the address to store the timestamp counter. + locations->AddRegisterTemps(3); } void InstructionCodeGeneratorARMVIXL::VisitMethodEntryHook(HMethodEntryHook* instruction) { @@ -2824,8 +2875,7 @@ void LocationsBuilderARMVIXL::VisitExit(HExit* exit) { exit->SetLocations(nullptr); } -void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { -} +void InstructionCodeGeneratorARMVIXL::VisitExit([[maybe_unused]] HExit* exit) {} void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition, vixl32::Label* true_target, @@ -3422,7 +3472,7 @@ void LocationsBuilderARMVIXL::VisitIntConstant(HIntConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorARMVIXL::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARMVIXL::VisitIntConstant([[maybe_unused]] HIntConstant* constant) { // Will be generated at use site. } @@ -3432,7 +3482,7 @@ void LocationsBuilderARMVIXL::VisitNullConstant(HNullConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorARMVIXL::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARMVIXL::VisitNullConstant([[maybe_unused]] HNullConstant* constant) { // Will be generated at use site. } @@ -3442,7 +3492,7 @@ void LocationsBuilderARMVIXL::VisitLongConstant(HLongConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorARMVIXL::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARMVIXL::VisitLongConstant([[maybe_unused]] HLongConstant* constant) { // Will be generated at use site. } @@ -3453,7 +3503,7 @@ void LocationsBuilderARMVIXL::VisitFloatConstant(HFloatConstant* constant) { } void InstructionCodeGeneratorARMVIXL::VisitFloatConstant( - HFloatConstant* constant ATTRIBUTE_UNUSED) { + [[maybe_unused]] HFloatConstant* constant) { // Will be generated at use site. } @@ -3464,7 +3514,7 @@ void LocationsBuilderARMVIXL::VisitDoubleConstant(HDoubleConstant* constant) { } void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant( - HDoubleConstant* constant ATTRIBUTE_UNUSED) { + [[maybe_unused]] HDoubleConstant* constant) { // Will be generated at use site. } @@ -3473,7 +3523,7 @@ void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* construct } void InstructionCodeGeneratorARMVIXL::VisitConstructorFence( - HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + [[maybe_unused]] HConstructorFence* constructor_fence) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } @@ -3489,7 +3539,7 @@ void LocationsBuilderARMVIXL::VisitReturnVoid(HReturnVoid* ret) { ret->SetLocations(nullptr); } -void InstructionCodeGeneratorARMVIXL::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARMVIXL::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) { codegen_->GenerateFrameExit(); } @@ -5617,7 +5667,7 @@ void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) } void InstructionCodeGeneratorARMVIXL::VisitParameterValue( - HParameterValue* instruction ATTRIBUTE_UNUSED) { + [[maybe_unused]] HParameterValue* instruction) { // Nothing to do, the parameter is already at its location. } @@ -5628,7 +5678,7 @@ void LocationsBuilderARMVIXL::VisitCurrentMethod(HCurrentMethod* instruction) { } void InstructionCodeGeneratorARMVIXL::VisitCurrentMethod( - HCurrentMethod* instruction ATTRIBUTE_UNUSED) { + [[maybe_unused]] HCurrentMethod* instruction) { // Nothing to do, the method is already at its location. } @@ -5769,7 +5819,7 @@ void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) { locations->SetOut(Location::Any()); } -void InstructionCodeGeneratorARMVIXL::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARMVIXL::VisitPhi([[maybe_unused]] HPhi* instruction) { LOG(FATAL) << "Unreachable"; } @@ -6104,8 +6154,7 @@ Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* inpu Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode) { DCHECK(!DataType::IsFloatingPointType(constant->GetType())); - if (constant->IsConstant() && - CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) { + if (constant->IsConstant() && CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) { return Location::ConstantLocation(constant); } return Location::RequiresRegister(); @@ -7234,7 +7283,7 @@ void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp, } } -void LocationsBuilderARMVIXL::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { +void LocationsBuilderARMVIXL::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) { LOG(FATAL) << "Unreachable"; } @@ -7604,7 +7653,9 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { locations->SetInAt(0, Location::RequiresRegister()); } locations->SetOut(Location::RequiresRegister()); - if (load_kind == HLoadClass::LoadKind::kBssEntry) { + if (load_kind == HLoadClass::LoadKind::kBssEntry || + load_kind == HLoadClass::LoadKind::kBssEntryPublic || + load_kind == HLoadClass::LoadKind::kBssEntryPackage) { if (!gUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution or initialization and marking to save everything we need. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); @@ -7631,9 +7682,8 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ Location out_loc = locations->Out(); vixl32::Register out = OutputRegister(cls); - const ReadBarrierOption read_barrier_option = cls->IsInBootImage() - ? kWithoutReadBarrier - : gCompilerReadBarrierOption; + const ReadBarrierOption read_barrier_option = + cls->IsInBootImage() ? kWithoutReadBarrier : GetCompilerReadBarrierOption(); bool generate_null_check = false; switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: { @@ -7887,7 +7937,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE codegen_->EmitMovwMovtPlaceholder(labels, out); // All aligned loads are implicitly atomic consume operations on ARM. codegen_->GenerateGcRootFieldLoad( - load, out_loc, out, /*offset=*/ 0, gCompilerReadBarrierOption); + load, out_loc, out, /*offset=*/0, GetCompilerReadBarrierOption()); LoadStringSlowPathARMVIXL* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load); codegen_->AddSlowPath(slow_path); @@ -7908,14 +7958,13 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE load->GetString())); // /* GcRoot<mirror::String> */ out = *out codegen_->GenerateGcRootFieldLoad( - load, out_loc, out, /*offset=*/ 0, gCompilerReadBarrierOption); + load, out_loc, out, /*offset=*/0, GetCompilerReadBarrierOption()); return; } default: break; } - // TODO: Re-add the compiler code to do string dex cache lookup again. DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall); InvokeRuntimeCallingConventionARMVIXL calling_convention; __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); @@ -7944,7 +7993,7 @@ void LocationsBuilderARMVIXL::VisitClearException(HClearException* clear) { new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); } -void InstructionCodeGeneratorARMVIXL::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARMVIXL::VisitClearException([[maybe_unused]] HClearException* clear) { UseScratchRegisterScope temps(GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); __ Mov(temp, 0); @@ -8490,12 +8539,11 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { kWithoutReadBarrier); // /* HeapReference<Class> */ temp = temp->iftable_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - temp_loc, - iftable_offset, - maybe_temp2_loc, - kWithoutReadBarrier); + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + iftable_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Iftable is never null. __ Ldr(RegisterFrom(maybe_temp2_loc), MemOperand(temp, array_length_offset)); // Loop through the iftable and check if any class matches. @@ -9828,7 +9876,7 @@ void CodeGeneratorARMVIXL::EmitThunkCode(const linker::LinkerPatch& patch, assembler.FinalizeCode(); code->resize(assembler.CodeSize()); MemoryRegion code_region(code->data(), code->size()); - assembler.FinalizeInstructions(code_region); + assembler.CopyInstructions(code_region); } VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal( @@ -9867,12 +9915,12 @@ void InstructionCodeGeneratorARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulat } } -void LocationsBuilderARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { +void LocationsBuilderARMVIXL::VisitBoundType([[maybe_unused]] HBoundType* instruction) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; } -void InstructionCodeGeneratorARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARMVIXL::VisitBoundType([[maybe_unused]] HBoundType* instruction) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; } diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index f5abe6951a..0175448fde 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -620,7 +620,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { block_labels_.resize(GetGraph()->GetBlocks().size()); } - void Finalize(CodeAllocator* allocator) override; + void Finalize() override; bool NeedsTwoRegisters(DataType::Type type) const override { return type == DataType::Type::kFloat64 || type == DataType::Type::kInt64; diff --git a/compiler/optimizing/code_generator_riscv64.cc b/compiler/optimizing/code_generator_riscv64.cc new file mode 100644 index 0000000000..7f23730143 --- /dev/null +++ b/compiler/optimizing/code_generator_riscv64.cc @@ -0,0 +1,6494 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_riscv64.h" + +#include "android-base/logging.h" +#include "android-base/macros.h" +#include "arch/riscv64/jni_frame_riscv64.h" +#include "arch/riscv64/registers_riscv64.h" +#include "base/arena_containers.h" +#include "base/macros.h" +#include "code_generator_utils.h" +#include "dwarf/register.h" +#include "heap_poisoning.h" +#include "intrinsics_list.h" +#include "intrinsics_riscv64.h" +#include "jit/profiling_info.h" +#include "linker/linker_patch.h" +#include "mirror/class-inl.h" +#include "optimizing/nodes.h" +#include "stack_map_stream.h" +#include "utils/label.h" +#include "utils/riscv64/assembler_riscv64.h" +#include "utils/stack_checks.h" + +namespace art HIDDEN { +namespace riscv64 { + +// Placeholder values embedded in instructions, patched at link time. +constexpr uint32_t kLinkTimeOffsetPlaceholderHigh = 0x12345; +constexpr uint32_t kLinkTimeOffsetPlaceholderLow = 0x678; + +// Compare-and-jump packed switch generates approx. 3 + 1.5 * N 32-bit +// instructions for N cases. +// Table-based packed switch generates approx. 10 32-bit instructions +// and N 32-bit data words for N cases. +// We switch to the table-based method starting with 6 entries. +static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 6; + +// FCLASS returns a 10-bit classification mask with the two highest bits marking NaNs +// (signaling and quiet). To detect a NaN, we can compare (either BGE or BGEU, the sign +// bit is always clear) the result with the `kFClassNaNMinValue`. +static_assert(kSignalingNaN == 0x100); +static_assert(kQuietNaN == 0x200); +static constexpr int32_t kFClassNaNMinValue = 0x100; + +static constexpr XRegister kCoreCalleeSaves[] = { + // S1(TR) is excluded as the ART thread register. + S0, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, RA +}; + +static constexpr FRegister kFpuCalleeSaves[] = { + FS0, FS1, FS2, FS3, FS4, FS5, FS6, FS7, FS8, FS9, FS10, FS11 +}; + +#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kRiscv64PointerSize, x).Int32Value() + +Location RegisterOrZeroBitPatternLocation(HInstruction* instruction) { + return IsZeroBitPattern(instruction) + ? Location::ConstantLocation(instruction) + : Location::RequiresRegister(); +} + +XRegister InputXRegisterOrZero(Location location) { + if (location.IsConstant()) { + DCHECK(location.GetConstant()->IsZeroBitPattern()); + return Zero; + } else { + return location.AsRegister<XRegister>(); + } +} + +Location ValueLocationForStore(HInstruction* value) { + if (IsZeroBitPattern(value)) { + return Location::ConstantLocation(value); + } else if (DataType::IsFloatingPointType(value->GetType())) { + return Location::RequiresFpuRegister(); + } else { + return Location::RequiresRegister(); + } +} + +Location Riscv64ReturnLocation(DataType::Type return_type) { + switch (return_type) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kUint32: + case DataType::Type::kInt32: + case DataType::Type::kReference: + case DataType::Type::kUint64: + case DataType::Type::kInt64: + return Location::RegisterLocation(A0); + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + return Location::FpuRegisterLocation(FA0); + + case DataType::Type::kVoid: + return Location::NoLocation(); + } + UNREACHABLE(); +} + +static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + DCHECK_EQ( + calling_convention.GetRegisterAt(0), + calling_convention.GetReturnLocation(DataType::Type::kReference).AsRegister<XRegister>()); + return caller_saves; +} + +template <ClassStatus kStatus> +static constexpr int64_t ShiftedSignExtendedClassStatusValue() { + // This is used only for status values that have the highest bit set. + static_assert(CLZ(enum_cast<uint32_t>(kStatus)) == status_lsb_position); + constexpr uint32_t kShiftedStatusValue = enum_cast<uint32_t>(kStatus) << status_lsb_position; + static_assert(kShiftedStatusValue >= 0x80000000u); + return static_cast<int64_t>(kShiftedStatusValue) - (INT64_C(1) << 32); +} + +int32_t ReadBarrierMarkEntrypointOffset(Location ref) { + DCHECK(ref.IsRegister()); + int reg = ref.reg(); + DCHECK(T0 <= reg && reg <= T6 && reg != TR) << reg; + // Note: Entrypoints for registers X30 (T5) and X31 (T6) are stored in entries + // for X0 (Zero) and X1 (RA) because these are not valid registers for marking + // and we currently have slots only up to register 29. + int entry_point_number = (reg >= 30) ? reg - 30 : reg; + return Thread::ReadBarrierMarkEntryPointsOffset<kRiscv64PointerSize>(entry_point_number); +} + +Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return_type) { + return Riscv64ReturnLocation(return_type); +} + +Location InvokeDexCallingConventionVisitorRISCV64::GetReturnLocation(DataType::Type type) const { + return Riscv64ReturnLocation(type); +} + +Location InvokeDexCallingConventionVisitorRISCV64::GetMethodLocation() const { + return Location::RegisterLocation(kArtMethodRegister); +} + +Location InvokeDexCallingConventionVisitorRISCV64::GetNextLocation(DataType::Type type) { + Location next_location; + if (type == DataType::Type::kVoid) { + LOG(FATAL) << "Unexpected parameter type " << type; + } + + // Note: Unlike the RISC-V C/C++ calling convention, managed ABI does not use + // GPRs to pass FP args when we run out of FPRs. + if (DataType::IsFloatingPointType(type) && + float_index_ < calling_convention.GetNumberOfFpuRegisters()) { + next_location = + Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(float_index_++)); + } else if (!DataType::IsFloatingPointType(type) && + (gp_index_ < calling_convention.GetNumberOfRegisters())) { + next_location = Location::RegisterLocation(calling_convention.GetRegisterAt(gp_index_++)); + } else { + size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_); + next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset) : + Location::StackSlot(stack_offset); + } + + // Space on the stack is reserved for all arguments. + stack_index_ += DataType::Is64BitType(type) ? 2 : 1; + + return next_location; +} + +Location CriticalNativeCallingConventionVisitorRiscv64::GetNextLocation(DataType::Type type) { + DCHECK_NE(type, DataType::Type::kReference); + + Location location = Location::NoLocation(); + if (DataType::IsFloatingPointType(type)) { + if (fpr_index_ < kParameterFpuRegistersLength) { + location = Location::FpuRegisterLocation(kParameterFpuRegisters[fpr_index_]); + ++fpr_index_; + } + // Native ABI allows passing excessive FP args in GPRs. This is facilitated by + // inserting fake conversion intrinsic calls (`Double.doubleToRawLongBits()` + // or `Float.floatToRawIntBits()`) by `CriticalNativeAbiFixupRiscv64`. + // TODO(riscv64): Implement these intrinsics and `CriticalNativeAbiFixupRiscv64`. + } else { + // Native ABI uses the same core registers as a runtime call. + if (gpr_index_ < kRuntimeParameterCoreRegistersLength) { + location = Location::RegisterLocation(kRuntimeParameterCoreRegisters[gpr_index_]); + ++gpr_index_; + } + } + if (location.IsInvalid()) { + if (DataType::Is64BitType(type)) { + location = Location::DoubleStackSlot(stack_offset_); + } else { + location = Location::StackSlot(stack_offset_); + } + stack_offset_ += kFramePointerSize; + + if (for_register_allocation_) { + location = Location::Any(); + } + } + return location; +} + +Location CriticalNativeCallingConventionVisitorRiscv64::GetReturnLocation( + DataType::Type type) const { + // The result is returned the same way in native ABI and managed ABI. No result conversion is + // needed, see comments in `Riscv64JniCallingConvention::RequiresSmallResultTypeExtension()`. + InvokeDexCallingConventionVisitorRISCV64 dex_calling_convention; + return dex_calling_convention.GetReturnLocation(type); +} + +Location CriticalNativeCallingConventionVisitorRiscv64::GetMethodLocation() const { + // Pass the method in the hidden argument T0. + return Location::RegisterLocation(T0); +} + +#define __ down_cast<CodeGeneratorRISCV64*>(codegen)->GetAssembler()-> // NOLINT + +void LocationsBuilderRISCV64::HandleInvoke(HInvoke* instruction) { + InvokeDexCallingConventionVisitorRISCV64 calling_convention_visitor; + CodeGenerator::CreateCommonInvokeLocationSummary(instruction, &calling_convention_visitor); +} + +class CompileOptimizedSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + CompileOptimizedSlowPathRISCV64() : SlowPathCodeRISCV64(/*instruction=*/ nullptr) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + uint32_t entrypoint_offset = + GetThreadOffset<kRiscv64PointerSize>(kQuickCompileOptimized).Int32Value(); + __ Bind(GetEntryLabel()); + __ Loadd(RA, TR, entrypoint_offset); + // Note: we don't record the call here (and therefore don't generate a stack + // map), as the entrypoint should never be suspended. + __ Jalr(RA); + __ J(GetExitLabel()); + } + + const char* GetDescription() const override { return "CompileOptimizedSlowPath"; } + + private: + DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathRISCV64); +}; + +class SuspendCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + SuspendCheckSlowPathRISCV64(HSuspendCheck* instruction, HBasicBlock* successor) + : SlowPathCodeRISCV64(instruction), successor_(successor) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + LocationSummary* locations = instruction_->GetLocations(); + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); // Only saves live vector registers for SIMD. + riscv64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickTestSuspend, void, void>(); + RestoreLiveRegisters(codegen, locations); // Only restores live vector registers for SIMD. + if (successor_ == nullptr) { + __ J(GetReturnLabel()); + } else { + __ J(riscv64_codegen->GetLabelOf(successor_)); + } + } + + Riscv64Label* GetReturnLabel() { + DCHECK(successor_ == nullptr); + return &return_label_; + } + + const char* GetDescription() const override { return "SuspendCheckSlowPathRISCV64"; } + + HBasicBlock* GetSuccessor() const { return successor_; } + + private: + // If not null, the block to branch to after the suspend check. + HBasicBlock* const successor_; + + // If `successor_` is null, the label to branch to after the suspend check. + Riscv64Label return_label_; + + DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathRISCV64); +}; + +class NullCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + explicit NullCheckSlowPathRISCV64(HNullCheck* instr) : SlowPathCodeRISCV64(instr) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + __ Bind(GetEntryLabel()); + if (instruction_->CanThrowIntoCatchBlock()) { + // Live registers will be restored in the catch block if caught. + SaveLiveRegisters(codegen, instruction_->GetLocations()); + } + riscv64_codegen->InvokeRuntime( + kQuickThrowNullPointer, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); + } + + bool IsFatal() const override { return true; } + + const char* GetDescription() const override { return "NullCheckSlowPathRISCV64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathRISCV64); +}; + +class BoundsCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + explicit BoundsCheckSlowPathRISCV64(HBoundsCheck* instruction) + : SlowPathCodeRISCV64(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + LocationSummary* locations = instruction_->GetLocations(); + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + __ Bind(GetEntryLabel()); + if (instruction_->CanThrowIntoCatchBlock()) { + // Live registers will be restored in the catch block if caught. + SaveLiveRegisters(codegen, instruction_->GetLocations()); + } + // We're moving two locations to locations that could overlap, so we need a parallel + // move resolver. + InvokeRuntimeCallingConvention calling_convention; + codegen->EmitParallelMoves(locations->InAt(0), + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + DataType::Type::kInt32, + locations->InAt(1), + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + DataType::Type::kInt32); + QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() ? + kQuickThrowStringBounds : + kQuickThrowArrayBounds; + riscv64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); + CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); + } + + bool IsFatal() const override { return true; } + + const char* GetDescription() const override { return "BoundsCheckSlowPathRISCV64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathRISCV64); +}; + +class LoadClassSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + LoadClassSlowPathRISCV64(HLoadClass* cls, HInstruction* at) : SlowPathCodeRISCV64(at), cls_(cls) { + DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); + } + + void EmitNativeCode(CodeGenerator* codegen) override { + LocationSummary* locations = instruction_->GetLocations(); + Location out = locations->Out(); + const uint32_t dex_pc = instruction_->GetDexPc(); + bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); + bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); + + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + if (must_resolve_type) { + DCHECK(IsSameDexFile(cls_->GetDexFile(), riscv64_codegen->GetGraph()->GetDexFile()) || + riscv64_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) || + ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(), + &cls_->GetDexFile())); + dex::TypeIndex type_index = cls_->GetTypeIndex(); + __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_); + if (cls_->NeedsAccessCheck()) { + CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>(); + riscv64_codegen->InvokeRuntime( + kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this); + } else { + CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); + riscv64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); + } + // If we also must_do_clinit, the resolved type is now in the correct register. + } else { + DCHECK(must_do_clinit); + Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); + riscv64_codegen->MoveLocation( + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source, cls_->GetType()); + } + if (must_do_clinit) { + riscv64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); + } + + // Move the class to the desired location. + if (out.IsValid()) { + DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); + DataType::Type type = DataType::Type::kReference; + DCHECK_EQ(type, instruction_->GetType()); + riscv64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type); + } + RestoreLiveRegisters(codegen, locations); + + __ J(GetExitLabel()); + } + + const char* GetDescription() const override { return "LoadClassSlowPathRISCV64"; } + + private: + // The class this slow path will load. + HLoadClass* const cls_; + + DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathRISCV64); +}; + +class DeoptimizationSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + explicit DeoptimizationSlowPathRISCV64(HDeoptimize* instruction) + : SlowPathCodeRISCV64(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + __ Bind(GetEntryLabel()); + LocationSummary* locations = instruction_->GetLocations(); + SaveLiveRegisters(codegen, locations); + InvokeRuntimeCallingConvention calling_convention; + __ LoadConst32(calling_convention.GetRegisterAt(0), + static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); + riscv64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); + } + + const char* GetDescription() const override { return "DeoptimizationSlowPathRISCV64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathRISCV64); +}; + +// Slow path generating a read barrier for a GC root. +class ReadBarrierForRootSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + ReadBarrierForRootSlowPathRISCV64(HInstruction* instruction, Location out, Location root) + : SlowPathCodeRISCV64(instruction), out_(out), root_(root) { + DCHECK(gUseReadBarrier); + } + + void EmitNativeCode(CodeGenerator* codegen) override { + LocationSummary* locations = instruction_->GetLocations(); + DataType::Type type = DataType::Type::kReference; + XRegister reg_out = out_.AsRegister<XRegister>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsLoadClass() || + instruction_->IsLoadString() || + (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier for GC root slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + riscv64_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + root_, + DataType::Type::kReference); + riscv64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); + riscv64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); + + RestoreLiveRegisters(codegen, locations); + __ J(GetExitLabel()); + } + + const char* GetDescription() const override { return "ReadBarrierForRootSlowPathRISCV64"; } + + private: + const Location out_; + const Location root_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathRISCV64); +}; + +class ArraySetSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + explicit ArraySetSlowPathRISCV64(HInstruction* instruction) : SlowPathCodeRISCV64(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + LocationSummary* locations = instruction_->GetLocations(); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); + parallel_move.AddMove( + locations->InAt(0), + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + DataType::Type::kReference, + nullptr); + parallel_move.AddMove( + locations->InAt(1), + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + DataType::Type::kInt32, + nullptr); + parallel_move.AddMove( + locations->InAt(2), + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + DataType::Type::kReference, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + riscv64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); + RestoreLiveRegisters(codegen, locations); + __ J(GetExitLabel()); + } + + const char* GetDescription() const override { return "ArraySetSlowPathRISCV64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathRISCV64); +}; + +class TypeCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + explicit TypeCheckSlowPathRISCV64(HInstruction* instruction, bool is_fatal) + : SlowPathCodeRISCV64(instruction), is_fatal_(is_fatal) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + LocationSummary* locations = instruction_->GetLocations(); + + uint32_t dex_pc = instruction_->GetDexPc(); + DCHECK(instruction_->IsCheckCast() + || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + + __ Bind(GetEntryLabel()); + if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { + SaveLiveRegisters(codegen, locations); + } + + // We're moving two locations to locations that could overlap, so we need a parallel + // move resolver. + InvokeRuntimeCallingConvention calling_convention; + codegen->EmitParallelMoves(locations->InAt(0), + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + DataType::Type::kReference, + locations->InAt(1), + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + DataType::Type::kReference); + if (instruction_->IsInstanceOf()) { + riscv64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>(); + DataType::Type ret_type = instruction_->GetType(); + Location ret_loc = calling_convention.GetReturnLocation(ret_type); + riscv64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); + } else { + DCHECK(instruction_->IsCheckCast()); + riscv64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>(); + } + + if (!is_fatal_) { + RestoreLiveRegisters(codegen, locations); + __ J(GetExitLabel()); + } + } + + const char* GetDescription() const override { return "TypeCheckSlowPathRISCV64"; } + + bool IsFatal() const override { return is_fatal_; } + + private: + const bool is_fatal_; + + DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathRISCV64); +}; + +class DivZeroCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + explicit DivZeroCheckSlowPathRISCV64(HDivZeroCheck* instruction) + : SlowPathCodeRISCV64(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + __ Bind(GetEntryLabel()); + riscv64_codegen->InvokeRuntime( + kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); + } + + bool IsFatal() const override { return true; } + + const char* GetDescription() const override { return "DivZeroCheckSlowPathRISCV64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathRISCV64); +}; + +class ReadBarrierMarkSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + ReadBarrierMarkSlowPathRISCV64(HInstruction* instruction, Location ref, Location entrypoint) + : SlowPathCodeRISCV64(instruction), ref_(ref), entrypoint_(entrypoint) { + DCHECK(gUseReadBarrier); + DCHECK(entrypoint.IsRegister()); + } + + const char* GetDescription() const override { return "ReadBarrierMarkSlowPathRISCV64"; } + + void EmitNativeCode(CodeGenerator* codegen) override { + LocationSummary* locations = instruction_->GetLocations(); + XRegister ref_reg = ref_.AsRegister<XRegister>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsPredicatedInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsArraySet() || + instruction_->IsLoadClass() || + instruction_->IsLoadString() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast() || + (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + DCHECK(ref_reg >= T0 && ref_reg != TR); + + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in A0 and V0 respectively): + // + // A0 <- ref + // V0 <- ReadBarrierMark(A0) + // ref <- V0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + riscv64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); + DCHECK_NE(entrypoint_.AsRegister<XRegister>(), TMP); // A taken branch can clobber `TMP`. + __ Jalr(entrypoint_.AsRegister<XRegister>()); // Clobbers `RA` (used as the `entrypoint_`). + __ J(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + + // The location of the already loaded entrypoint. + const Location entrypoint_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathRISCV64); +}; + +class LoadStringSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + explicit LoadStringSlowPathRISCV64(HLoadString* instruction) + : SlowPathCodeRISCV64(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + DCHECK(instruction_->IsLoadString()); + DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry); + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex(); + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + InvokeRuntimeCallingConvention calling_convention; + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + __ LoadConst32(calling_convention.GetRegisterAt(0), string_index.index_); + riscv64_codegen->InvokeRuntime( + kQuickResolveString, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); + + DataType::Type type = DataType::Type::kReference; + DCHECK_EQ(type, instruction_->GetType()); + riscv64_codegen->MoveLocation( + locations->Out(), calling_convention.GetReturnLocation(type), type); + RestoreLiveRegisters(codegen, locations); + + __ J(GetExitLabel()); + } + + const char* GetDescription() const override { return "LoadStringSlowPathRISCV64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathRISCV64); +}; + +#undef __ +#define __ down_cast<Riscv64Assembler*>(GetAssembler())-> // NOLINT + +template <typename Reg, + void (Riscv64Assembler::*opS)(Reg, FRegister, FRegister), + void (Riscv64Assembler::*opD)(Reg, FRegister, FRegister)> +inline void InstructionCodeGeneratorRISCV64::FpBinOp( + Reg rd, FRegister rs1, FRegister rs2, DataType::Type type) { + Riscv64Assembler* assembler = down_cast<CodeGeneratorRISCV64*>(codegen_)->GetAssembler(); + if (type == DataType::Type::kFloat32) { + (assembler->*opS)(rd, rs1, rs2); + } else { + DCHECK_EQ(type, DataType::Type::kFloat64); + (assembler->*opD)(rd, rs1, rs2); + } +} + +inline void InstructionCodeGeneratorRISCV64::FAdd( + FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) { + FpBinOp<FRegister, &Riscv64Assembler::FAddS, &Riscv64Assembler::FAddD>(rd, rs1, rs2, type); +} + +inline void InstructionCodeGeneratorRISCV64::FSub( + FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) { + FpBinOp<FRegister, &Riscv64Assembler::FSubS, &Riscv64Assembler::FSubD>(rd, rs1, rs2, type); +} + +inline void InstructionCodeGeneratorRISCV64::FDiv( + FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) { + FpBinOp<FRegister, &Riscv64Assembler::FDivS, &Riscv64Assembler::FDivD>(rd, rs1, rs2, type); +} + +inline void InstructionCodeGeneratorRISCV64::FMul( + FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) { + FpBinOp<FRegister, &Riscv64Assembler::FMulS, &Riscv64Assembler::FMulD>(rd, rs1, rs2, type); +} + +inline void InstructionCodeGeneratorRISCV64::FMin( + FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) { + FpBinOp<FRegister, &Riscv64Assembler::FMinS, &Riscv64Assembler::FMinD>(rd, rs1, rs2, type); +} + +inline void InstructionCodeGeneratorRISCV64::FMax( + FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) { + FpBinOp<FRegister, &Riscv64Assembler::FMaxS, &Riscv64Assembler::FMaxD>(rd, rs1, rs2, type); +} + +inline void InstructionCodeGeneratorRISCV64::FEq( + XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) { + FpBinOp<XRegister, &Riscv64Assembler::FEqS, &Riscv64Assembler::FEqD>(rd, rs1, rs2, type); +} + +inline void InstructionCodeGeneratorRISCV64::FLt( + XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) { + FpBinOp<XRegister, &Riscv64Assembler::FLtS, &Riscv64Assembler::FLtD>(rd, rs1, rs2, type); +} + +inline void InstructionCodeGeneratorRISCV64::FLe( + XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) { + FpBinOp<XRegister, &Riscv64Assembler::FLeS, &Riscv64Assembler::FLeD>(rd, rs1, rs2, type); +} + +template <typename Reg, + void (Riscv64Assembler::*opS)(Reg, FRegister), + void (Riscv64Assembler::*opD)(Reg, FRegister)> +inline void InstructionCodeGeneratorRISCV64::FpUnOp( + Reg rd, FRegister rs1, DataType::Type type) { + Riscv64Assembler* assembler = down_cast<CodeGeneratorRISCV64*>(codegen_)->GetAssembler(); + if (type == DataType::Type::kFloat32) { + (assembler->*opS)(rd, rs1); + } else { + DCHECK_EQ(type, DataType::Type::kFloat64); + (assembler->*opD)(rd, rs1); + } +} + +inline void InstructionCodeGeneratorRISCV64::FAbs( + FRegister rd, FRegister rs1, DataType::Type type) { + FpUnOp<FRegister, &Riscv64Assembler::FAbsS, &Riscv64Assembler::FAbsD>(rd, rs1, type); +} + +inline void InstructionCodeGeneratorRISCV64::FNeg( + FRegister rd, FRegister rs1, DataType::Type type) { + FpUnOp<FRegister, &Riscv64Assembler::FNegS, &Riscv64Assembler::FNegD>(rd, rs1, type); +} + +inline void InstructionCodeGeneratorRISCV64::FMv( + FRegister rd, FRegister rs1, DataType::Type type) { + FpUnOp<FRegister, &Riscv64Assembler::FMvS, &Riscv64Assembler::FMvD>(rd, rs1, type); +} + +inline void InstructionCodeGeneratorRISCV64::FClass( + XRegister rd, FRegister rs1, DataType::Type type) { + FpUnOp<XRegister, &Riscv64Assembler::FClassS, &Riscv64Assembler::FClassD>(rd, rs1, type); +} + +void InstructionCodeGeneratorRISCV64::Load( + Location out, XRegister rs1, int32_t offset, DataType::Type type) { + switch (type) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + __ Loadbu(out.AsRegister<XRegister>(), rs1, offset); + break; + case DataType::Type::kInt8: + __ Loadb(out.AsRegister<XRegister>(), rs1, offset); + break; + case DataType::Type::kUint16: + __ Loadhu(out.AsRegister<XRegister>(), rs1, offset); + break; + case DataType::Type::kInt16: + __ Loadh(out.AsRegister<XRegister>(), rs1, offset); + break; + case DataType::Type::kInt32: + __ Loadw(out.AsRegister<XRegister>(), rs1, offset); + break; + case DataType::Type::kInt64: + __ Loadd(out.AsRegister<XRegister>(), rs1, offset); + break; + case DataType::Type::kReference: + __ Loadwu(out.AsRegister<XRegister>(), rs1, offset); + break; + case DataType::Type::kFloat32: + __ FLoadw(out.AsFpuRegister<FRegister>(), rs1, offset); + break; + case DataType::Type::kFloat64: + __ FLoadd(out.AsFpuRegister<FRegister>(), rs1, offset); + break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: + LOG(FATAL) << "Unreachable type " << type; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorRISCV64::Store( + Location value, XRegister rs1, int32_t offset, DataType::Type type) { + DCHECK_IMPLIES(value.IsConstant(), IsZeroBitPattern(value.GetConstant())); + if (kPoisonHeapReferences && type == DataType::Type::kReference && !value.IsConstant()) { + riscv64::ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + __ Mv(tmp, value.AsRegister<XRegister>()); + codegen_->PoisonHeapReference(tmp); + __ Storew(tmp, rs1, offset); + return; + } + switch (type) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + __ Storeb(InputXRegisterOrZero(value), rs1, offset); + break; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + __ Storeh(InputXRegisterOrZero(value), rs1, offset); + break; + case DataType::Type::kFloat32: + if (!value.IsConstant()) { + __ FStorew(value.AsFpuRegister<FRegister>(), rs1, offset); + break; + } + FALLTHROUGH_INTENDED; + case DataType::Type::kInt32: + case DataType::Type::kReference: + __ Storew(InputXRegisterOrZero(value), rs1, offset); + break; + case DataType::Type::kFloat64: + if (!value.IsConstant()) { + __ FStored(value.AsFpuRegister<FRegister>(), rs1, offset); + break; + } + FALLTHROUGH_INTENDED; + case DataType::Type::kInt64: + __ Stored(InputXRegisterOrZero(value), rs1, offset); + break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: + LOG(FATAL) << "Unreachable type " << type; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorRISCV64::ShNAdd( + XRegister rd, XRegister rs1, XRegister rs2, DataType::Type type) { + switch (type) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + DCHECK_EQ(DataType::SizeShift(type), 0u); + __ Add(rd, rs1, rs2); + break; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + DCHECK_EQ(DataType::SizeShift(type), 1u); + __ Sh1Add(rd, rs1, rs2); + break; + case DataType::Type::kInt32: + case DataType::Type::kReference: + case DataType::Type::kFloat32: + DCHECK_EQ(DataType::SizeShift(type), 2u); + __ Sh2Add(rd, rs1, rs2); + break; + case DataType::Type::kInt64: + case DataType::Type::kFloat64: + DCHECK_EQ(DataType::SizeShift(type), 3u); + __ Sh3Add(rd, rs1, rs2); + break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: + LOG(FATAL) << "Unreachable type " << type; + UNREACHABLE(); + } +} + +Riscv64Assembler* ParallelMoveResolverRISCV64::GetAssembler() const { + return codegen_->GetAssembler(); +} + +void ParallelMoveResolverRISCV64::EmitMove(size_t index) { + MoveOperands* move = moves_[index]; + codegen_->MoveLocation(move->GetDestination(), move->GetSource(), move->GetType()); +} + +void ParallelMoveResolverRISCV64::EmitSwap(size_t index) { + MoveOperands* move = moves_[index]; + codegen_->SwapLocations(move->GetDestination(), move->GetSource(), move->GetType()); +} + +void ParallelMoveResolverRISCV64::SpillScratch([[maybe_unused]] int reg) { + LOG(FATAL) << "Unimplemented"; + UNREACHABLE(); +} + +void ParallelMoveResolverRISCV64::RestoreScratch([[maybe_unused]] int reg) { + LOG(FATAL) << "Unimplemented"; + UNREACHABLE(); +} + +void ParallelMoveResolverRISCV64::Exchange(int index1, int index2, bool double_slot) { + // We have 2 scratch X registers and 1 scratch F register that we can use. We prefer + // to use X registers for the swap but if both offsets are too big, we need to reserve + // one of the X registers for address adjustment and use an F register. + bool use_fp_tmp2 = false; + if (!IsInt<12>(index2)) { + if (!IsInt<12>(index1)) { + use_fp_tmp2 = true; + } else { + std::swap(index1, index2); + } + } + DCHECK_IMPLIES(!IsInt<12>(index2), use_fp_tmp2); + + Location loc1(double_slot ? Location::DoubleStackSlot(index1) : Location::StackSlot(index1)); + Location loc2(double_slot ? Location::DoubleStackSlot(index2) : Location::StackSlot(index2)); + riscv64::ScratchRegisterScope srs(GetAssembler()); + Location tmp = Location::RegisterLocation(srs.AllocateXRegister()); + DataType::Type tmp_type = double_slot ? DataType::Type::kInt64 : DataType::Type::kInt32; + Location tmp2 = use_fp_tmp2 + ? Location::FpuRegisterLocation(srs.AllocateFRegister()) + : Location::RegisterLocation(srs.AllocateXRegister()); + DataType::Type tmp2_type = use_fp_tmp2 + ? (double_slot ? DataType::Type::kFloat64 : DataType::Type::kFloat32) + : tmp_type; + + codegen_->MoveLocation(tmp, loc1, tmp_type); + codegen_->MoveLocation(tmp2, loc2, tmp2_type); + if (use_fp_tmp2) { + codegen_->MoveLocation(loc2, tmp, tmp_type); + } else { + // We cannot use `Stored()` or `Storew()` via `MoveLocation()` because we have + // no more scratch registers available. Use `Sd()` or `Sw()` explicitly. + DCHECK(IsInt<12>(index2)); + if (double_slot) { + __ Sd(tmp.AsRegister<XRegister>(), SP, index2); + } else { + __ Sw(tmp.AsRegister<XRegister>(), SP, index2); + } + srs.FreeXRegister(tmp.AsRegister<XRegister>()); // Free a temporary for `MoveLocation()`. + } + codegen_->MoveLocation(loc1, tmp2, tmp2_type); +} + +InstructionCodeGeneratorRISCV64::InstructionCodeGeneratorRISCV64(HGraph* graph, + CodeGeneratorRISCV64* codegen) + : InstructionCodeGenerator(graph, codegen), + assembler_(codegen->GetAssembler()), + codegen_(codegen) {} + +void InstructionCodeGeneratorRISCV64::GenerateClassInitializationCheck( + SlowPathCodeRISCV64* slow_path, XRegister class_reg) { + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + XRegister tmp2 = srs.AllocateXRegister(); + + // We shall load the full 32-bit status word with sign-extension and compare as unsigned + // to a sign-extended shifted status value. This yields the same comparison as loading and + // materializing unsigned but the constant is materialized with a single LUI instruction. + __ Loadw(tmp, class_reg, mirror::Class::StatusOffset().SizeValue()); // Sign-extended. + __ Li(tmp2, ShiftedSignExtendedClassStatusValue<ClassStatus::kVisiblyInitialized>()); + __ Bltu(tmp, tmp2, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void InstructionCodeGeneratorRISCV64::GenerateBitstringTypeCheckCompare( + HTypeCheckInstruction* instruction, XRegister temp) { + UNUSED(instruction); + UNUSED(temp); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::GenerateSuspendCheck(HSuspendCheck* instruction, + HBasicBlock* successor) { + if (instruction->IsNoOp()) { + if (successor != nullptr) { + __ J(codegen_->GetLabelOf(successor)); + } + return; + } + + if (codegen_->CanUseImplicitSuspendCheck()) { + LOG(FATAL) << "Unimplemented ImplicitSuspendCheck"; + return; + } + + SuspendCheckSlowPathRISCV64* slow_path = + down_cast<SuspendCheckSlowPathRISCV64*>(instruction->GetSlowPath()); + + if (slow_path == nullptr) { + slow_path = + new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathRISCV64(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } + + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + __ Loadw(tmp, TR, Thread::ThreadFlagsOffset<kRiscv64PointerSize>().Int32Value()); + static_assert(Thread::SuspendOrCheckpointRequestFlags() != std::numeric_limits<uint32_t>::max()); + static_assert(IsPowerOfTwo(Thread::SuspendOrCheckpointRequestFlags() + 1u)); + // Shift out other bits. Use an instruction that can be 16-bit with the "C" Standard Extension. + __ Slli(tmp, tmp, CLZ(static_cast<uint64_t>(Thread::SuspendOrCheckpointRequestFlags()))); + if (successor == nullptr) { + __ Bnez(tmp, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetReturnLabel()); + } else { + __ Beqz(tmp, codegen_->GetLabelOf(successor)); + __ J(slow_path->GetEntryLabel()); + // slow_path will return to GetLabelOf(successor). + } +} + +void InstructionCodeGeneratorRISCV64::GenerateReferenceLoadOneRegister( + HInstruction* instruction, + Location out, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option) { + XRegister out_reg = out.AsRegister<XRegister>(); + if (read_barrier_option == kWithReadBarrier) { + CHECK(gUseReadBarrier); + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out, + out_reg, + offset, + maybe_temp, + /* needs_null_check= */ false); + } else { + // Load with slow path based read barrier. + // Save the value of `out` into `maybe_temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + __ Mv(maybe_temp.AsRegister<XRegister>(), out_reg); + // /* HeapReference<Object> */ out = *(out + offset) + __ Loadwu(out_reg, out_reg, offset); + codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + __ Loadwu(out_reg, out_reg, offset); + codegen_->MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorRISCV64::GenerateReferenceLoadTwoRegisters( + HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option) { + XRegister out_reg = out.AsRegister<XRegister>(); + XRegister obj_reg = obj.AsRegister<XRegister>(); + if (read_barrier_option == kWithReadBarrier) { + CHECK(gUseReadBarrier); + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out, + obj_reg, + offset, + maybe_temp, + /* needs_null_check= */ false); + } else { + // Load with slow path based read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ Loadwu(out_reg, obj_reg, offset); + codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ Loadwu(out_reg, obj_reg, offset); + codegen_->MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorRISCV64::GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + XRegister obj, + uint32_t offset, + ReadBarrierOption read_barrier_option, + Riscv64Label* label_low) { + DCHECK_IMPLIES(label_low != nullptr, offset == kLinkTimeOffsetPlaceholderLow) << offset; + XRegister root_reg = root.AsRegister<XRegister>(); + if (read_barrier_option == kWithReadBarrier) { + DCHECK(gUseReadBarrier); + if (kUseBakerReadBarrier) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (T6) the read barrier mark entry point corresponding + // to register `root`. If `temp` is null, it means that `GetIsGcMarking()` + // is false, and vice versa. + // + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp != null) { + // root = temp(root) + // } + // + // TODO(riscv64): Introduce a "marking register" that holds the pointer to one of the + // register marking entrypoints if marking (null if not marking) and make sure that + // marking entrypoints for other registers are at known offsets, so that we can call + // them using the "marking register" plus the offset embedded in the JALR instruction. + + if (label_low != nullptr) { + __ Bind(label_low); + } + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ Loadwu(root_reg, obj, offset); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Slow path marking the GC root `root`. + XRegister tmp = RA; // Use RA as temp. It is clobbered in the slow path anyway. + SlowPathCodeRISCV64* slow_path = + new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathRISCV64( + instruction, root, Location::RegisterLocation(tmp)); + codegen_->AddSlowPath(slow_path); + + const int32_t entry_point_offset = ReadBarrierMarkEntrypointOffset(root); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ Loadd(tmp, TR, entry_point_offset); + __ Bnez(tmp, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } else { + // GC root loaded through a slow path for read barriers other + // than Baker's. + // /* GcRoot<mirror::Object>* */ root = obj + offset + if (label_low != nullptr) { + __ Bind(label_low); + } + __ AddConst32(root_reg, obj, offset); + // /* mirror::Object* */ root = root->Read() + codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + } + } else { + // Plain GC root load with no read barrier. + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + if (label_low != nullptr) { + __ Bind(label_low); + } + __ Loadwu(root_reg, obj, offset); + // Note that GC roots are not affected by heap poisoning, thus we + // do not have to unpoison `root_reg` here. + } +} + +void InstructionCodeGeneratorRISCV64::GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, + Riscv64Label* true_target, + Riscv64Label* false_target) { + HInstruction* cond = instruction->InputAt(condition_input_index); + + if (true_target == nullptr && false_target == nullptr) { + // Nothing to do. The code always falls through. + return; + } else if (cond->IsIntConstant()) { + // Constant condition, statically compared against "true" (integer value 1). + if (cond->AsIntConstant()->IsTrue()) { + if (true_target != nullptr) { + __ J(true_target); + } + } else { + DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue(); + if (false_target != nullptr) { + __ J(false_target); + } + } + return; + } + + // The following code generates these patterns: + // (1) true_target == nullptr && false_target != nullptr + // - opposite condition true => branch to false_target + // (2) true_target != nullptr && false_target == nullptr + // - condition true => branch to true_target + // (3) true_target != nullptr && false_target != nullptr + // - condition true => branch to true_target + // - branch to false_target + if (IsBooleanValueOrMaterializedCondition(cond)) { + // The condition instruction has been materialized, compare the output to 0. + Location cond_val = instruction->GetLocations()->InAt(condition_input_index); + DCHECK(cond_val.IsRegister()); + if (true_target == nullptr) { + __ Beqz(cond_val.AsRegister<XRegister>(), false_target); + } else { + __ Bnez(cond_val.AsRegister<XRegister>(), true_target); + } + } else { + // The condition instruction has not been materialized, use its inputs as + // the comparison and its condition as the branch condition. + HCondition* condition = cond->AsCondition(); + DataType::Type type = condition->InputAt(0)->GetType(); + LocationSummary* locations = condition->GetLocations(); + IfCondition if_cond = condition->GetCondition(); + Riscv64Label* branch_target = true_target; + + if (true_target == nullptr) { + if_cond = condition->GetOppositeCondition(); + branch_target = false_target; + } + + switch (type) { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateFpCondition(if_cond, condition->IsGtBias(), type, locations, branch_target); + break; + default: + // Integral types and reference equality. + GenerateIntLongCompareAndBranch(if_cond, locations, branch_target); + break; + } + } + + // If neither branch falls through (case 3), the conditional branch to `true_target` + // was already emitted (case 2) and we need to emit a jump to `false_target`. + if (true_target != nullptr && false_target != nullptr) { + __ J(false_target); + } +} + +void InstructionCodeGeneratorRISCV64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DataType::Type type = instruction->GetResultType(); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + XRegister out = locations->Out().AsRegister<XRegister>(); + XRegister dividend = locations->InAt(0).AsRegister<XRegister>(); + int64_t imm = Int64FromConstant(second.GetConstant()); + DCHECK(imm == 1 || imm == -1); + + if (instruction->IsRem()) { + __ Mv(out, Zero); + } else { + if (imm == -1) { + if (type == DataType::Type::kInt32) { + __ Subw(out, Zero, dividend); + } else { + DCHECK_EQ(type, DataType::Type::kInt64); + __ Sub(out, Zero, dividend); + } + } else if (out != dividend) { + __ Mv(out, dividend); + } + } +} + +void InstructionCodeGeneratorRISCV64::DivRemByPowerOfTwo(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DataType::Type type = instruction->GetResultType(); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64) << type; + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + XRegister out = locations->Out().AsRegister<XRegister>(); + XRegister dividend = locations->InAt(0).AsRegister<XRegister>(); + int64_t imm = Int64FromConstant(second.GetConstant()); + int64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); + int ctz_imm = CTZ(abs_imm); + DCHECK_GE(ctz_imm, 1); // Division by +/-1 is handled by `DivRemOneOrMinusOne()`. + + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + // Calculate the negative dividend adjustment `tmp = dividend < 0 ? abs_imm - 1 : 0`. + // This adjustment is needed for rounding the division result towards zero. + if (type == DataType::Type::kInt32 || ctz_imm == 1) { + // A 32-bit dividend is sign-extended to 64-bit, so we can use the upper bits. + // And for a 64-bit division by +/-2, we need just the sign bit. + DCHECK_IMPLIES(type == DataType::Type::kInt32, ctz_imm < 32); + __ Srli(tmp, dividend, 64 - ctz_imm); + } else { + // For other 64-bit divisions, we need to replicate the sign bit. + __ Srai(tmp, dividend, 63); + __ Srli(tmp, tmp, 64 - ctz_imm); + } + // The rest of the calculation can use 64-bit operations even for 32-bit div/rem. + __ Add(tmp, tmp, dividend); + if (instruction->IsDiv()) { + __ Srai(out, tmp, ctz_imm); + if (imm < 0) { + __ Neg(out, out); + } + } else { + if (ctz_imm <= 11) { + __ Andi(tmp, tmp, -abs_imm); + } else { + ScratchRegisterScope srs2(GetAssembler()); + XRegister tmp2 = srs2.AllocateXRegister(); + __ Li(tmp2, -abs_imm); + __ And(tmp, tmp, tmp2); + } + __ Sub(out, dividend, tmp); + } +} + +void InstructionCodeGeneratorRISCV64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + LocationSummary* locations = instruction->GetLocations(); + XRegister dividend = locations->InAt(0).AsRegister<XRegister>(); + XRegister out = locations->Out().AsRegister<XRegister>(); + Location second = locations->InAt(1); + int64_t imm = Int64FromConstant(second.GetConstant()); + DataType::Type type = instruction->GetResultType(); + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + + // TODO: optimize with constant. + __ LoadConst64(tmp, imm); + if (instruction->IsDiv()) { + if (type == DataType::Type::kInt32) { + __ Divw(out, dividend, tmp); + } else { + __ Div(out, dividend, tmp); + } + } else { + if (type == DataType::Type::kInt32) { + __ Remw(out, dividend, tmp); + } else { + __ Rem(out, dividend, tmp); + } + } +} + +void InstructionCodeGeneratorRISCV64::GenerateDivRemIntegral(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DataType::Type type = instruction->GetResultType(); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64) << type; + + LocationSummary* locations = instruction->GetLocations(); + XRegister out = locations->Out().AsRegister<XRegister>(); + Location second = locations->InAt(1); + + if (second.IsConstant()) { + int64_t imm = Int64FromConstant(second.GetConstant()); + if (imm == 0) { + // Do not generate anything. DivZeroCheck would prevent any code to be executed. + } else if (imm == 1 || imm == -1) { + DivRemOneOrMinusOne(instruction); + } else if (IsPowerOfTwo(AbsOrMin(imm))) { + DivRemByPowerOfTwo(instruction); + } else { + DCHECK(imm <= -2 || imm >= 2); + GenerateDivRemWithAnyConstant(instruction); + } + } else { + XRegister dividend = locations->InAt(0).AsRegister<XRegister>(); + XRegister divisor = second.AsRegister<XRegister>(); + if (instruction->IsDiv()) { + if (type == DataType::Type::kInt32) { + __ Divw(out, dividend, divisor); + } else { + __ Div(out, dividend, divisor); + } + } else { + if (type == DataType::Type::kInt32) { + __ Remw(out, dividend, divisor); + } else { + __ Rem(out, dividend, divisor); + } + } + } +} + +void InstructionCodeGeneratorRISCV64::GenerateIntLongCondition(IfCondition cond, + LocationSummary* locations) { + XRegister rd = locations->Out().AsRegister<XRegister>(); + XRegister rs1 = locations->InAt(0).AsRegister<XRegister>(); + Location rs2_location = locations->InAt(1); + bool use_imm = rs2_location.IsConstant(); + int64_t imm = use_imm ? CodeGenerator::GetInt64ValueOf(rs2_location.GetConstant()) : 0; + XRegister rs2 = use_imm ? kNoXRegister : rs2_location.AsRegister<XRegister>(); + switch (cond) { + case kCondEQ: + case kCondNE: + if (!use_imm) { + __ Sub(rd, rs1, rs2); // SUB is OK here even for 32-bit comparison. + } else if (imm != 0) { + DCHECK(IsInt<12>(-imm)); + __ Addi(rd, rs1, -imm); // ADDI is OK here even for 32-bit comparison. + } // else test `rs1` directly without subtraction for `use_imm && imm == 0`. + if (cond == kCondEQ) { + __ Seqz(rd, (use_imm && imm == 0) ? rs1 : rd); + } else { + __ Snez(rd, (use_imm && imm == 0) ? rs1 : rd); + } + break; + + case kCondLT: + case kCondGE: + if (use_imm) { + DCHECK(IsInt<12>(imm)); + __ Slti(rd, rs1, imm); + } else { + __ Slt(rd, rs1, rs2); + } + if (cond == kCondGE) { + // Calculate `rs1 >= rhs` as `!(rs1 < rhs)` since there's only the SLT but no SGE. + __ Xori(rd, rd, 1); + } + break; + + case kCondLE: + case kCondGT: + if (use_imm) { + // Calculate `rs1 <= imm` as `rs1 < imm + 1`. + DCHECK(IsInt<12>(imm + 1)); // The value that overflows would fail this check. + __ Slti(rd, rs1, imm + 1); + } else { + __ Slt(rd, rs2, rs1); + } + if ((cond == kCondGT) == use_imm) { + // Calculate `rs1 > imm` as `!(rs1 < imm + 1)` and calculate + // `rs1 <= rs2` as `!(rs2 < rs1)` since there's only the SLT but no SGE. + __ Xori(rd, rd, 1); + } + break; + + case kCondB: + case kCondAE: + if (use_imm) { + // Sltiu sign-extends its 12-bit immediate operand before the comparison + // and thus lets us compare directly with unsigned values in the ranges + // [0, 0x7ff] and [0x[ffffffff]fffff800, 0x[ffffffff]ffffffff]. + DCHECK(IsInt<12>(imm)); + __ Sltiu(rd, rs1, imm); + } else { + __ Sltu(rd, rs1, rs2); + } + if (cond == kCondAE) { + // Calculate `rs1 AE rhs` as `!(rs1 B rhs)` since there's only the SLTU but no SGEU. + __ Xori(rd, rd, 1); + } + break; + + case kCondBE: + case kCondA: + if (use_imm) { + // Calculate `rs1 BE imm` as `rs1 B imm + 1`. + // Sltiu sign-extends its 12-bit immediate operand before the comparison + // and thus lets us compare directly with unsigned values in the ranges + // [0, 0x7ff] and [0x[ffffffff]fffff800, 0x[ffffffff]ffffffff]. + DCHECK(IsInt<12>(imm + 1)); // The value that overflows would fail this check. + __ Sltiu(rd, rs1, imm + 1); + } else { + __ Sltu(rd, rs2, rs1); + } + if ((cond == kCondA) == use_imm) { + // Calculate `rs1 A imm` as `!(rs1 B imm + 1)` and calculate + // `rs1 BE rs2` as `!(rs2 B rs1)` since there's only the SLTU but no SGEU. + __ Xori(rd, rd, 1); + } + break; + } +} + +void InstructionCodeGeneratorRISCV64::GenerateIntLongCompareAndBranch(IfCondition cond, + LocationSummary* locations, + Riscv64Label* label) { + XRegister left = locations->InAt(0).AsRegister<XRegister>(); + Location right_location = locations->InAt(1); + if (right_location.IsConstant()) { + DCHECK_EQ(CodeGenerator::GetInt64ValueOf(right_location.GetConstant()), 0); + switch (cond) { + case kCondEQ: + case kCondBE: // <= 0 if zero + __ Beqz(left, label); + break; + case kCondNE: + case kCondA: // > 0 if non-zero + __ Bnez(left, label); + break; + case kCondLT: + __ Bltz(left, label); + break; + case kCondGE: + __ Bgez(left, label); + break; + case kCondLE: + __ Blez(left, label); + break; + case kCondGT: + __ Bgtz(left, label); + break; + case kCondB: // always false + break; + case kCondAE: // always true + __ J(label); + break; + } + } else { + XRegister right_reg = right_location.AsRegister<XRegister>(); + switch (cond) { + case kCondEQ: + __ Beq(left, right_reg, label); + break; + case kCondNE: + __ Bne(left, right_reg, label); + break; + case kCondLT: + __ Blt(left, right_reg, label); + break; + case kCondGE: + __ Bge(left, right_reg, label); + break; + case kCondLE: + __ Ble(left, right_reg, label); + break; + case kCondGT: + __ Bgt(left, right_reg, label); + break; + case kCondB: + __ Bltu(left, right_reg, label); + break; + case kCondAE: + __ Bgeu(left, right_reg, label); + break; + case kCondBE: + __ Bleu(left, right_reg, label); + break; + case kCondA: + __ Bgtu(left, right_reg, label); + break; + } + } +} + +void InstructionCodeGeneratorRISCV64::GenerateFpCondition(IfCondition cond, + bool gt_bias, + DataType::Type type, + LocationSummary* locations, + Riscv64Label* label) { + // RISCV-V FP compare instructions yield the following values: + // l<r l=r l>r Unordered + // FEQ l,r 0 1 0 0 + // FLT l,r 1 0 0 0 + // FLT r,l 0 0 1 0 + // FLE l,r 1 1 0 0 + // FLE r,l 0 1 1 0 + // + // We can calculate the `Compare` results using the following formulas: + // l<r l=r l>r Unordered + // Compare/gt_bias -1 0 1 1 = ((FLE l,r) ^ 1) - (FLT l,r) + // Compare/lt_bias -1 0 1 -1 = ((FLE r,l) - 1) + (FLT r,l) + // These are emitted in `VisitCompare()`. + // + // This function emits a fused `Condition(Compare(., .), 0)`. If we compare the + // `Compare` results above with 0, we get the following values and formulas: + // l<r l=r l>r Unordered + // CondEQ/- 0 1 0 0 = (FEQ l, r) + // CondNE/- 1 0 1 1 = (FEQ l, r) ^ 1 + // CondLT/gt_bias 1 0 0 0 = (FLT l,r) + // CondLT/lt_bias 1 0 0 1 = (FLE r,l) ^ 1 + // CondLE/gt_bias 1 1 0 0 = (FLE l,r) + // CondLE/lt_bias 1 1 0 1 = (FLT r,l) ^ 1 + // CondGT/gt_bias 0 0 1 1 = (FLE l,r) ^ 1 + // CondGT/lt_bias 0 0 1 0 = (FLT r,l) + // CondGE/gt_bias 0 1 1 1 = (FLT l,r) ^ 1 + // CondGE/lt_bias 0 1 1 0 = (FLE r,l) + // (CondEQ/CondNE comparison with zero yields the same result with gt_bias and lt_bias.) + // + // If the condition is not materialized, the `^ 1` is not emitted, + // instead the condition is reversed by emitting BEQZ instead of BNEZ. + + FRegister rs1 = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister rs2 = locations->InAt(1).AsFpuRegister<FRegister>(); + + DCHECK_EQ(label != nullptr, locations->Out().IsInvalid()); + ScratchRegisterScope srs(GetAssembler()); + XRegister rd = + (label != nullptr) ? srs.AllocateXRegister() : locations->Out().AsRegister<XRegister>(); + bool reverse_condition = false; + + switch (cond) { + case kCondEQ: + FEq(rd, rs1, rs2, type); + break; + case kCondNE: + FEq(rd, rs1, rs2, type); + reverse_condition = true; + break; + case kCondLT: + if (gt_bias) { + FLt(rd, rs1, rs2, type); + } else { + FLe(rd, rs2, rs1, type); + reverse_condition = true; + } + break; + case kCondLE: + if (gt_bias) { + FLe(rd, rs1, rs2, type); + } else { + FLt(rd, rs2, rs1, type); + reverse_condition = true; + } + break; + case kCondGT: + if (gt_bias) { + FLe(rd, rs1, rs2, type); + reverse_condition = true; + } else { + FLt(rd, rs2, rs1, type); + } + break; + case kCondGE: + if (gt_bias) { + FLt(rd, rs1, rs2, type); + reverse_condition = true; + } else { + FLe(rd, rs2, rs1, type); + } + break; + default: + LOG(FATAL) << "Unexpected floating-point condition " << cond; + UNREACHABLE(); + } + + if (label != nullptr) { + if (reverse_condition) { + __ Beqz(rd, label); + } else { + __ Bnez(rd, label); + } + } else { + if (reverse_condition) { + __ Xori(rd, rd, 1); + } + } +} + +void CodeGeneratorRISCV64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + XRegister obj, + uint32_t offset, + Location temp, + bool needs_null_check) { + GenerateReferenceLoadWithBakerReadBarrier( + instruction, ref, obj, offset, /*index=*/ Location::NoLocation(), temp, needs_null_check); +} + +void CodeGeneratorRISCV64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + XRegister obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check) { + GenerateReferenceLoadWithBakerReadBarrier( + instruction, ref, obj, data_offset, index, temp, needs_null_check); +} + +void CodeGeneratorRISCV64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + XRegister obj, + uint32_t offset, + Location index, + Location temp, + bool needs_null_check) { + // For now, use the same approach as for GC roots plus unpoison the reference if needed. + // TODO(riscv64): Implement checking if the holder is black. + UNUSED(temp); + + XRegister reg = ref.AsRegister<XRegister>(); + if (index.IsValid()) { + DCHECK(instruction->IsArrayGet()); + DCHECK(!needs_null_check); + DCHECK(index.IsRegister()); + // /* HeapReference<Object> */ ref = *(obj + index * element_size + offset) + DataType::Type type = DataType::Type::kReference; + DCHECK_EQ(type, instruction->GetType()); + instruction_visitor_.ShNAdd(reg, index.AsRegister<XRegister>(), obj, type); + __ Loadwu(reg, reg, offset); + } else { + // /* HeapReference<Object> */ ref = *(obj + offset) + __ Loadwu(reg, obj, offset); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + } + MaybeUnpoisonHeapReference(reg); + + // Slow path marking the reference. + XRegister tmp = RA; // Use RA as temp. It is clobbered in the slow path anyway. + SlowPathCodeRISCV64* slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathRISCV64( + instruction, ref, Location::RegisterLocation(tmp)); + AddSlowPath(slow_path); + + const int32_t entry_point_offset = ReadBarrierMarkEntrypointOffset(ref); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ Loadd(tmp, TR, entry_point_offset); + __ Bnez(tmp, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorRISCV64::GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + UNUSED(instruction); + UNUSED(out); + UNUSED(ref); + UNUSED(obj); + UNUSED(offset); + UNUSED(index); + LOG(FATAL) << "Unimplemented"; +} + +void CodeGeneratorRISCV64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + if (gUseReadBarrier) { + // Baker's read barriers shall be handled by the fast path + // (CodeGeneratorRISCV64::GenerateReferenceLoadWithBakerReadBarrier). + DCHECK(!kUseBakerReadBarrier); + // If heap poisoning is enabled, unpoisoning will be taken care of + // by the runtime within the slow path. + GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); + } else if (kPoisonHeapReferences) { + UnpoisonHeapReference(out.AsRegister<XRegister>()); + } +} + +void CodeGeneratorRISCV64::GenerateReadBarrierForRootSlow(HInstruction* instruction, + Location out, + Location root) { + DCHECK(gUseReadBarrier); + + // Insert a slow path based read barrier *after* the GC root load. + // + // Note that GC roots are not affected by heap poisoning, so we do + // not need to do anything special for this here. + SlowPathCodeRISCV64* slow_path = + new (GetScopedAllocator()) ReadBarrierForRootSlowPathRISCV64(instruction, out, root); + AddSlowPath(slow_path); + + __ J(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void InstructionCodeGeneratorRISCV64::HandleGoto(HInstruction* instruction, + HBasicBlock* successor) { + if (successor->IsExitBlock()) { + DCHECK(instruction->GetPrevious()->AlwaysThrows()); + return; // no code needed + } + + HBasicBlock* block = instruction->GetBlock(); + HInstruction* previous = instruction->GetPrevious(); + HLoopInformation* info = block->GetLoopInformation(); + + if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { + codegen_->MaybeIncrementHotness(/*is_frame_entry=*/ false); + GenerateSuspendCheck(info->GetSuspendCheck(), successor); + return; // `GenerateSuspendCheck()` emitted the jump. + } + if (block->IsEntryBlock() && previous != nullptr && previous->IsSuspendCheck()) { + GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); + } + if (!codegen_->GoesToNextBlock(block, successor)) { + __ J(codegen_->GetLabelOf(successor)); + } +} + +void InstructionCodeGeneratorRISCV64::GenPackedSwitchWithCompares(XRegister adjusted, + XRegister temp, + uint32_t num_entries, + HBasicBlock* switch_block) { + // Note: The `adjusted` register holds `value - lower_bound`. If the `lower_bound` is 0, + // `adjusted` is the original `value` register and we must not clobber it. Otherwise, + // `adjusted` is the `temp`. The caller already emitted the `adjusted < num_entries` check. + + // Create a set of compare/jumps. + ArrayRef<HBasicBlock* const> successors(switch_block->GetSuccessors()); + uint32_t index = 0; + for (; num_entries - index >= 2u; index += 2u) { + // Jump to `successors[index]` if `value == lower_bound + index`. + // Note that `adjusted` holds `value - lower_bound - index`. + __ Beqz(adjusted, codegen_->GetLabelOf(successors[index])); + if (num_entries - index == 2u) { + break; // The last entry shall match, so the branch shall be unconditional. + } + // Jump to `successors[index + 1]` if `value == lower_bound + index + 1`. + // Modify `adjusted` to hold `value - lower_bound - index - 2` for this comparison. + __ Addi(temp, adjusted, -2); + adjusted = temp; + __ Bltz(adjusted, codegen_->GetLabelOf(successors[index + 1])); + } + // For the last entry, unconditionally jump to `successors[num_entries - 1]`. + __ J(codegen_->GetLabelOf(successors[num_entries - 1u])); +} + +void InstructionCodeGeneratorRISCV64::GenTableBasedPackedSwitch(XRegister adjusted, + XRegister temp, + uint32_t num_entries, + HBasicBlock* switch_block) { + // Note: The `adjusted` register holds `value - lower_bound`. If the `lower_bound` is 0, + // `adjusted` is the original `value` register and we must not clobber it. Otherwise, + // `adjusted` is the `temp`. The caller already emitted the `adjusted < num_entries` check. + + // Create a jump table. + ArenaVector<Riscv64Label*> labels(num_entries, + __ GetAllocator()->Adapter(kArenaAllocSwitchTable)); + const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors(); + for (uint32_t i = 0; i < num_entries; i++) { + labels[i] = codegen_->GetLabelOf(successors[i]); + } + JumpTable* table = __ CreateJumpTable(std::move(labels)); + + // Load the address of the jump table. + // Note: The `LoadLabelAddress()` emits AUIPC+ADD. It is possible to avoid the ADD and + // instead embed that offset in the LW below as well as all jump table entries but + // that would need some invasive changes in the jump table handling in the assembler. + ScratchRegisterScope srs(GetAssembler()); + XRegister table_base = srs.AllocateXRegister(); + __ LoadLabelAddress(table_base, table->GetLabel()); + + // Load the PC difference from the jump table. + // TODO(riscv64): Use SH2ADD from the Zba extension. + __ Slli(temp, adjusted, 2); + __ Add(temp, temp, table_base); + __ Lw(temp, temp, 0); + + // Compute the absolute target address by adding the table start address + // (the table contains offsets to targets relative to its start). + __ Add(temp, temp, table_base); + // And jump. + __ Jr(temp); +} + +int32_t InstructionCodeGeneratorRISCV64::VecAddress(LocationSummary* locations, + size_t size, + /*out*/ XRegister* adjusted_base) { + UNUSED(locations); + UNUSED(size); + UNUSED(adjusted_base); + LOG(FATAL) << "Unimplemented"; + UNREACHABLE(); +} + +void InstructionCodeGeneratorRISCV64::GenConditionalMove(HSelect* select) { + UNUSED(select); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::HandleBinaryOp(HBinaryOperation* instruction) { + DCHECK_EQ(instruction->InputCount(), 2u); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + DataType::Type type = instruction->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { + locations->SetInAt(0, Location::RequiresRegister()); + HInstruction* right = instruction->InputAt(1); + bool can_use_imm = false; + if (instruction->IsMin() || instruction->IsMax()) { + can_use_imm = IsZeroBitPattern(instruction); + } else if (right->IsConstant()) { + int64_t imm = CodeGenerator::GetInt64ValueOf(right->AsConstant()); + can_use_imm = IsInt<12>(instruction->IsSub() ? -imm : imm); + } + if (can_use_imm) { + locations->SetInAt(1, Location::ConstantLocation(right)); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + if (instruction->IsMin() || instruction->IsMax()) { + locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap); + } else { + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + } + break; + + default: + LOG(FATAL) << "Unexpected " << instruction->DebugName() << " type " << type; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorRISCV64::HandleBinaryOp(HBinaryOperation* instruction) { + DataType::Type type = instruction->GetType(); + LocationSummary* locations = instruction->GetLocations(); + + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { + XRegister rd = locations->Out().AsRegister<XRegister>(); + XRegister rs1 = locations->InAt(0).AsRegister<XRegister>(); + Location rs2_location = locations->InAt(1); + + bool use_imm = rs2_location.IsConstant(); + XRegister rs2 = use_imm ? kNoXRegister : rs2_location.AsRegister<XRegister>(); + int64_t imm = use_imm ? CodeGenerator::GetInt64ValueOf(rs2_location.GetConstant()) : 0; + + if (instruction->IsAnd()) { + if (use_imm) { + __ Andi(rd, rs1, imm); + } else { + __ And(rd, rs1, rs2); + } + } else if (instruction->IsOr()) { + if (use_imm) { + __ Ori(rd, rs1, imm); + } else { + __ Or(rd, rs1, rs2); + } + } else if (instruction->IsXor()) { + if (use_imm) { + __ Xori(rd, rs1, imm); + } else { + __ Xor(rd, rs1, rs2); + } + } else if (instruction->IsAdd() || instruction->IsSub()) { + if (type == DataType::Type::kInt32) { + if (use_imm) { + __ Addiw(rd, rs1, instruction->IsSub() ? -imm : imm); + } else if (instruction->IsAdd()) { + __ Addw(rd, rs1, rs2); + } else { + DCHECK(instruction->IsSub()); + __ Subw(rd, rs1, rs2); + } + } else { + if (use_imm) { + __ Addi(rd, rs1, instruction->IsSub() ? -imm : imm); + } else if (instruction->IsAdd()) { + __ Add(rd, rs1, rs2); + } else { + DCHECK(instruction->IsSub()); + __ Sub(rd, rs1, rs2); + } + } + } else if (instruction->IsMin()) { + DCHECK_IMPLIES(use_imm, imm == 0); + __ Min(rd, rs1, use_imm ? Zero : rs2); + } else { + DCHECK(instruction->IsMax()); + DCHECK_IMPLIES(use_imm, imm == 0); + __ Max(rd, rs1, use_imm ? Zero : rs2); + } + break; + } + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { + FRegister rd = locations->Out().AsFpuRegister<FRegister>(); + FRegister rs1 = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister rs2 = locations->InAt(1).AsFpuRegister<FRegister>(); + if (instruction->IsAdd()) { + FAdd(rd, rs1, rs2, type); + } else if (instruction->IsSub()) { + FSub(rd, rs1, rs2, type); + } else { + DCHECK(instruction->IsMin() || instruction->IsMax()); + // If one of the operands is NaN and the other is not, riscv64 instructions FMIN/FMAX + // return the other operand while we want to return the NaN operand. + DCHECK_NE(rd, rs1); // Requested `Location::kOutputOverlap`. + DCHECK_NE(rd, rs2); // Requested `Location::kOutputOverlap`. + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + XRegister tmp2 = srs.AllocateXRegister(); + Riscv64Label done; + // Return `rs1` if it's NaN. + FClass(tmp, rs1, type); + __ Li(tmp2, kFClassNaNMinValue); + FMv(rd, rs1, type); + __ Bgeu(tmp, tmp2, &done); + // Return `rs2` if it's NaN. + FClass(tmp, rs2, type); + FMv(rd, rs2, type); + __ Bgeu(tmp, tmp2, &done); + // Calculate Min/Max for non-NaN arguments. + if (instruction->IsMin()) { + FMin(rd, rs1, rs2, type); + } else { + FMax(rd, rs1, rs2, type); + } + __ Bind(&done); + } + break; + } + default: + LOG(FATAL) << "Unexpected binary operation type " << type; + UNREACHABLE(); + } +} + +void LocationsBuilderRISCV64::HandleCondition(HCondition* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + switch (instruction->InputAt(0)->GetType()) { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + break; + + default: { + locations->SetInAt(0, Location::RequiresRegister()); + HInstruction* rhs = instruction->InputAt(1); + bool use_imm = false; + if (rhs->IsConstant()) { + int64_t imm = CodeGenerator::GetInt64ValueOf(rhs->AsConstant()); + if (instruction->IsEmittedAtUseSite()) { + // For `HIf`, materialize all non-zero constants with an `HParallelMove`. + // Note: For certain constants and conditions, the code could be improved. + // For example, 2048 takes two instructions to materialize but the negative + // -2048 could be embedded in ADDI for EQ/NE comparison. + use_imm = (imm == 0); + } else { + // Constants that cannot be embedded in an instruction's 12-bit immediate shall be + // materialized with an `HParallelMove`. This simplifies the code and avoids cases + // with arithmetic overflow. Adjust the `imm` if needed for a particular instruction. + switch (instruction->GetCondition()) { + case kCondEQ: + case kCondNE: + imm = -imm; // ADDI with negative immediate (there is no SUBI). + break; + case kCondLE: + case kCondGT: + case kCondBE: + case kCondA: + imm += 1; // SLTI/SLTIU with adjusted immediate (there is no SLEI/SLEIU). + break; + default: + break; + } + use_imm = IsInt<12>(imm); + } + } + if (use_imm) { + locations->SetInAt(1, Location::ConstantLocation(rhs)); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } + break; + } + } + if (!instruction->IsEmittedAtUseSite()) { + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + } +} + +void InstructionCodeGeneratorRISCV64::HandleCondition(HCondition* instruction) { + if (instruction->IsEmittedAtUseSite()) { + return; + } + + DataType::Type type = instruction->InputAt(0)->GetType(); + LocationSummary* locations = instruction->GetLocations(); + switch (type) { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateFpCondition(instruction->GetCondition(), instruction->IsGtBias(), type, locations); + return; + default: + // Integral types and reference equality. + GenerateIntLongCondition(instruction->GetCondition(), locations); + return; + } +} + +void LocationsBuilderRISCV64::HandleShift(HBinaryOperation* instruction) { + DCHECK(instruction->IsShl() || + instruction->IsShr() || + instruction->IsUShr() || + instruction->IsRor()); + + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + DataType::Type type = instruction->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + default: + LOG(FATAL) << "Unexpected shift type " << type; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorRISCV64::HandleShift(HBinaryOperation* instruction) { + DCHECK(instruction->IsShl() || + instruction->IsShr() || + instruction->IsUShr() || + instruction->IsRor()); + LocationSummary* locations = instruction->GetLocations(); + DataType::Type type = instruction->GetType(); + + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { + XRegister rd = locations->Out().AsRegister<XRegister>(); + XRegister rs1 = locations->InAt(0).AsRegister<XRegister>(); + Location rs2_location = locations->InAt(1); + + if (rs2_location.IsConstant()) { + int64_t imm = CodeGenerator::GetInt64ValueOf(rs2_location.GetConstant()); + uint32_t shamt = + imm & (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance); + + if (shamt == 0) { + if (rd != rs1) { + __ Mv(rd, rs1); + } + } else if (type == DataType::Type::kInt32) { + if (instruction->IsShl()) { + __ Slliw(rd, rs1, shamt); + } else if (instruction->IsShr()) { + __ Sraiw(rd, rs1, shamt); + } else if (instruction->IsUShr()) { + __ Srliw(rd, rs1, shamt); + } else { + DCHECK(instruction->IsRor()); + __ Roriw(rd, rs1, shamt); + } + } else { + if (instruction->IsShl()) { + __ Slli(rd, rs1, shamt); + } else if (instruction->IsShr()) { + __ Srai(rd, rs1, shamt); + } else if (instruction->IsUShr()) { + __ Srli(rd, rs1, shamt); + } else { + DCHECK(instruction->IsRor()); + __ Rori(rd, rs1, shamt); + } + } + } else { + XRegister rs2 = rs2_location.AsRegister<XRegister>(); + if (type == DataType::Type::kInt32) { + if (instruction->IsShl()) { + __ Sllw(rd, rs1, rs2); + } else if (instruction->IsShr()) { + __ Sraw(rd, rs1, rs2); + } else if (instruction->IsUShr()) { + __ Srlw(rd, rs1, rs2); + } else { + DCHECK(instruction->IsRor()); + __ Rorw(rd, rs1, rs2); + } + } else { + if (instruction->IsShl()) { + __ Sll(rd, rs1, rs2); + } else if (instruction->IsShr()) { + __ Sra(rd, rs1, rs2); + } else if (instruction->IsUShr()) { + __ Srl(rd, rs1, rs2); + } else { + DCHECK(instruction->IsRor()); + __ Ror(rd, rs1, rs2); + } + } + } + break; + } + default: + LOG(FATAL) << "Unexpected shift operation type " << type; + } +} + +void CodeGeneratorRISCV64::MarkGCCard(XRegister object, + XRegister value, + bool value_can_be_null) { + Riscv64Label done; + ScratchRegisterScope srs(GetAssembler()); + XRegister card = srs.AllocateXRegister(); + XRegister temp = srs.AllocateXRegister(); + if (value_can_be_null) { + __ Beqz(value, &done); + } + // Load the address of the card table into `card`. + __ Loadd(card, TR, Thread::CardTableOffset<kRiscv64PointerSize>().Int32Value()); + + // Calculate the address of the card corresponding to `object`. + __ Srli(temp, object, gc::accounting::CardTable::kCardShift); + __ Add(temp, card, temp); + // Write the `art::gc::accounting::CardTable::kCardDirty` value into the + // `object`'s card. + // + // Register `card` contains the address of the card table. Note that the card + // table's base is biased during its creation so that it always starts at an + // address whose least-significant byte is equal to `kCardDirty` (see + // art::gc::accounting::CardTable::Create). Therefore the SB instruction + // below writes the `kCardDirty` (byte) value into the `object`'s card + // (located at `card + object >> kCardShift`). + // + // This dual use of the value in register `card` (1. to calculate the location + // of the card to mark; and 2. to load the `kCardDirty` value) saves a load + // (no need to explicitly load `kCardDirty` as an immediate value). + __ Storeb(card, temp, 0); + if (value_can_be_null) { + __ Bind(&done); + } +} + +void LocationsBuilderRISCV64::HandleFieldSet(HInstruction* instruction) { + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, ValueLocationForStore(instruction->InputAt(1))); +} + +void InstructionCodeGeneratorRISCV64::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info, + bool value_can_be_null, + WriteBarrierKind write_barrier_kind) { + DataType::Type type = field_info.GetFieldType(); + LocationSummary* locations = instruction->GetLocations(); + XRegister obj = locations->InAt(0).AsRegister<XRegister>(); + Location value = locations->InAt(1); + DCHECK_IMPLIES(value.IsConstant(), IsZeroBitPattern(value.GetConstant())); + bool is_volatile = field_info.IsVolatile(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + bool is_predicated = + instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet(); + + Riscv64Label pred_is_null; + if (is_predicated) { + __ Beqz(obj, &pred_is_null); + } + + if (is_volatile) { + if (DataType::Size(type) >= 4u) { + // Use AMOSWAP for 32-bit and 64-bit data types. + ScratchRegisterScope srs(GetAssembler()); + XRegister swap_src = kNoXRegister; + if (kPoisonHeapReferences && type == DataType::Type::kReference && !value.IsConstant()) { + swap_src = srs.AllocateXRegister(); + __ Mv(swap_src, value.AsRegister<XRegister>()); + codegen_->PoisonHeapReference(swap_src); + } else if (type == DataType::Type::kFloat64 && !value.IsConstant()) { + swap_src = srs.AllocateXRegister(); + __ FMvXD(swap_src, value.AsFpuRegister<FRegister>()); + } else if (type == DataType::Type::kFloat32 && !value.IsConstant()) { + swap_src = srs.AllocateXRegister(); + __ FMvXW(swap_src, value.AsFpuRegister<FRegister>()); + } else { + swap_src = InputXRegisterOrZero(value); + } + XRegister addr = srs.AllocateXRegister(); + __ AddConst64(addr, obj, offset); + if (DataType::Is64BitType(type)) { + __ AmoSwapD(Zero, swap_src, addr, AqRl::kRelease); + } else { + __ AmoSwapW(Zero, swap_src, addr, AqRl::kRelease); + } + } else { + // Use fences for smaller data types. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + Store(value, obj, offset, type); + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + } else { + Store(value, obj, offset, type); + } + + if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1)) && + write_barrier_kind != WriteBarrierKind::kDontEmit) { + codegen_->MarkGCCard( + obj, + value.AsRegister<XRegister>(), + value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck); + } + + if (is_predicated) { + __ Bind(&pred_is_null); + } +} + +void LocationsBuilderRISCV64::HandleFieldGet(HInstruction* instruction) { + DCHECK(instruction->IsInstanceFieldGet() || + instruction->IsStaticFieldGet() || + instruction->IsPredicatedInstanceFieldGet()); + + bool is_predicated = instruction->IsPredicatedInstanceFieldGet(); + + bool object_field_get_with_read_barrier = + gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, + object_field_get_with_read_barrier + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); + + // Input for object receiver. + locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister()); + + if (DataType::IsFloatingPointType(instruction->GetType())) { + if (is_predicated) { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + } else { + locations->SetOut(Location::RequiresFpuRegister()); + } + } else { + if (is_predicated) { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + } else { + // The output overlaps for an object field get when read barriers + // are enabled: we do not want the load to overwrite the object's + // location, as we need it to emit the read barrier. + locations->SetOut(Location::RequiresRegister(), + object_field_get_with_read_barrier ? Location::kOutputOverlap + : Location::kNoOutputOverlap); + } + } + + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorRISCV64::GenerateFieldLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorRISCV64::HandleFieldGet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || + instruction->IsStaticFieldGet() || + instruction->IsPredicatedInstanceFieldGet()); + DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType())); + DataType::Type type = instruction->GetType(); + LocationSummary* locations = instruction->GetLocations(); + Location obj_loc = locations->InAt(instruction->IsPredicatedInstanceFieldGet() ? 1 : 0); + XRegister obj = obj_loc.AsRegister<XRegister>(); + Location dst_loc = locations->Out(); + bool is_volatile = field_info.IsVolatile(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + + if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Object> */ dst = *(obj + offset) + Location temp_loc = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorRISCV64::GenerateFieldLoadWithBakerReadBarrier call. + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + dst_loc, + obj, + offset, + temp_loc, + /* needs_null_check= */ true); + } else { + Load(dst_loc, obj, offset, type); + } + + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + + if (type == DataType::Type::kReference && !(gUseReadBarrier && kUseBakerReadBarrier)) { + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, dst_loc, dst_loc, obj_loc, offset); + } +} + +void LocationsBuilderRISCV64::VisitAbove(HAbove* instruction) { + HandleCondition(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitAbove(HAbove* instruction) { + HandleCondition(instruction); +} + +void LocationsBuilderRISCV64::VisitAboveOrEqual(HAboveOrEqual* instruction) { + HandleCondition(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitAboveOrEqual(HAboveOrEqual* instruction) { + HandleCondition(instruction); +} + +void LocationsBuilderRISCV64::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorRISCV64::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + XRegister in = locations->InAt(0).AsRegister<XRegister>(); + XRegister out = locations->Out().AsRegister<XRegister>(); + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + __ Sraiw(tmp, in, 31); + __ Xor(out, in, tmp); + __ Subw(out, out, tmp); + break; + } + case DataType::Type::kInt64: { + XRegister in = locations->InAt(0).AsRegister<XRegister>(); + XRegister out = locations->Out().AsRegister<XRegister>(); + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + __ Srai(tmp, in, 63); + __ Xor(out, in, tmp); + __ Sub(out, out, tmp); + break; + } + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + FAbs(locations->Out().AsFpuRegister<FRegister>(), + locations->InAt(0).AsFpuRegister<FRegister>(), + abs->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + +void LocationsBuilderRISCV64::VisitAdd(HAdd* instruction) { + HandleBinaryOp(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitAdd(HAdd* instruction) { + HandleBinaryOp(instruction); +} + +void LocationsBuilderRISCV64::VisitAnd(HAnd* instruction) { + HandleBinaryOp(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitAnd(HAnd* instruction) { + HandleBinaryOp(instruction); +} + +void LocationsBuilderRISCV64::VisitArrayGet(HArrayGet* instruction) { + DataType::Type type = instruction->GetType(); + bool object_array_get_with_read_barrier = gUseReadBarrier && (type == DataType::Type::kReference); + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(instruction, + object_array_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (DataType::IsFloatingPointType(type)) { + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + } else { + // The output overlaps in the case of an object array get with + // read barriers enabled: we do not want the move to overwrite the + // array's location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); + } + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorRISCV64::GenerateArrayLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorRISCV64::VisitArrayGet(HArrayGet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location obj_loc = locations->InAt(0); + XRegister obj = obj_loc.AsRegister<XRegister>(); + Location out_loc = locations->Out(); + Location index = locations->InAt(1); + uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); + DataType::Type type = instruction->GetType(); + const bool maybe_compressed_char_at = + mirror::kUseStringCompression && instruction->IsStringCharAt(); + + Riscv64Label string_char_at_done; + if (maybe_compressed_char_at) { + DCHECK_EQ(type, DataType::Type::kUint16); + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + Riscv64Label uncompressed_load; + { + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + __ Loadw(tmp, obj, count_offset); + __ Andi(tmp, tmp, 0x1); + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + __ Bnez(tmp, &uncompressed_load); + } + XRegister out = out_loc.AsRegister<XRegister>(); + if (index.IsConstant()) { + int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue(); + __ Loadbu(out, obj, data_offset + const_index); + } else { + __ Add(out, obj, index.AsRegister<XRegister>()); + __ Loadbu(out, out, data_offset); + } + __ J(&string_char_at_done); + __ Bind(&uncompressed_load); + } + + if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + // /* HeapReference<Object> */ out = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + // Note that a potential implicit null check could be handled in these + // `CodeGeneratorRISCV64::Generate{Array,Field}LoadWithBakerReadBarrier()` calls + // but we currently do not support implicit null checks on `HArrayGet`. + DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); + Location temp = locations->GetTemp(0); + if (index.IsConstant()) { + // Array load with a constant index can be treated as a field load. + static constexpr size_t shift = DataType::SizeShift(DataType::Type::kReference); + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << shift) + data_offset; + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + offset, + temp, + /* needs_null_check= */ false); + } else { + codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + data_offset, + index, + temp, + /* needs_null_check= */ false); + } + } else if (index.IsConstant()) { + int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue(); + int32_t offset = data_offset + (const_index << DataType::SizeShift(type)); + Load(out_loc, obj, offset, type); + if (type == DataType::Type::kReference) { + DCHECK(!(gUseReadBarrier && kUseBakerReadBarrier)); + // If read barriers are enabled, emit read barriers other than Baker's using + // a slow path (and also unpoison the loaded reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); + } + } else { + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + ShNAdd(tmp, index.AsRegister<XRegister>(), obj, type); + Load(out_loc, tmp, data_offset, type); + if (type == DataType::Type::kReference) { + DCHECK(!(gUseReadBarrier && kUseBakerReadBarrier)); + // If read barriers are enabled, emit read barriers other than Baker's using + // a slow path (and also unpoison the loaded reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow( + instruction, out_loc, out_loc, obj_loc, data_offset, index); + } + } + + if (maybe_compressed_char_at) { + __ Bind(&string_char_at_done); + } +} + +void LocationsBuilderRISCV64::VisitArrayLength(HArrayLength* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorRISCV64::VisitArrayLength(HArrayLength* instruction) { + LocationSummary* locations = instruction->GetLocations(); + uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); + XRegister obj = locations->InAt(0).AsRegister<XRegister>(); + XRegister out = locations->Out().AsRegister<XRegister>(); + __ Loadwu(out, obj, offset); // Unsigned for string length; does not matter for other arrays. + codegen_->MaybeRecordImplicitNullCheck(instruction); + // Mask out compression flag from String's array length. + if (mirror::kUseStringCompression && instruction->IsStringLength()) { + __ Srli(out, out, 1u); + } +} + +void LocationsBuilderRISCV64::VisitArraySet(HArraySet* instruction) { + bool needs_type_check = instruction->NeedsTypeCheck(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, + needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetInAt(2, ValueLocationForStore(instruction->GetValue())); +} + +void InstructionCodeGeneratorRISCV64::VisitArraySet(HArraySet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XRegister array = locations->InAt(0).AsRegister<XRegister>(); + Location index = locations->InAt(1); + Location value = locations->InAt(2); + DataType::Type value_type = instruction->GetComponentType(); + bool needs_type_check = instruction->NeedsTypeCheck(); + bool needs_write_barrier = + CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); + size_t data_offset = mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value(); + SlowPathCodeRISCV64* slow_path = nullptr; + + if (needs_write_barrier) { + DCHECK_EQ(value_type, DataType::Type::kReference); + DCHECK(!value.IsConstant()); + Riscv64Label do_store; + + bool can_value_be_null = instruction->GetValueCanBeNull(); + if (can_value_be_null) { + __ Beqz(value.AsRegister<XRegister>(), &do_store); + } + + if (needs_type_check) { + slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathRISCV64(instruction); + codegen_->AddSlowPath(slow_path); + + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + + ScratchRegisterScope srs(GetAssembler()); + XRegister temp1 = srs.AllocateXRegister(); + XRegister temp2 = srs.AllocateXRegister(); + + // Note that when read barriers are enabled, the type checks are performed + // without read barriers. This is fine, even in the case where a class object + // is in the from-space after the flip, as a comparison involving such a type + // would not produce a false positive; it may of course produce a false + // negative, in which case we would take the ArraySet slow path. + + // /* HeapReference<Class> */ temp1 = array->klass_ + __ Loadwu(temp1, array, class_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + codegen_->MaybeUnpoisonHeapReference(temp1); + + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + __ Loadwu(temp2, temp1, component_offset); + // /* HeapReference<Class> */ temp1 = value->klass_ + __ Loadwu(temp1, value.AsRegister<XRegister>(), class_offset); + // If heap poisoning is enabled, no need to unpoison `temp1` + // nor `temp2`, as we are comparing two poisoned references. + if (instruction->StaticTypeOfArrayIsObjectArray()) { + Riscv64Label do_put; + __ Beq(temp1, temp2, &do_put); + // If heap poisoning is enabled, the `temp2` reference has + // not been unpoisoned yet; unpoison it now. + codegen_->MaybeUnpoisonHeapReference(temp2); + + // /* HeapReference<Class> */ temp1 = temp2->super_class_ + __ Loadwu(temp1, temp2, super_offset); + // If heap poisoning is enabled, no need to unpoison + // `temp1`, as we are comparing against null below. + __ Bnez(temp1, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ Bne(temp1, temp2, slow_path->GetEntryLabel()); + } + } + + if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) { + DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck) + << " Already null checked so we shouldn't do it again."; + codegen_->MarkGCCard(array, value.AsRegister<XRegister>(), /* emit_null_check= */ false); + } + + if (can_value_be_null) { + __ Bind(&do_store); + } + } + + if (index.IsConstant()) { + int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue(); + int32_t offset = data_offset + (const_index << DataType::SizeShift(value_type)); + Store(value, array, offset, value_type); + } else { + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + ShNAdd(tmp, index.AsRegister<XRegister>(), array, value_type); + Store(value, tmp, data_offset, value_type); + } + // There must be no instructions between the `Store()` and the `MaybeRecordImplicitNullCheck()`. + // We can avoid this if the type check makes the null check unconditionally. + DCHECK_IMPLIES(needs_type_check, needs_write_barrier); + if (!(needs_type_check && !instruction->GetValueCanBeNull())) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void LocationsBuilderRISCV64::VisitBelow(HBelow* instruction) { + HandleCondition(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitBelow(HBelow* instruction) { + HandleCondition(instruction); +} + +void LocationsBuilderRISCV64::VisitBelowOrEqual(HBelowOrEqual* instruction) { + HandleCondition(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitBelowOrEqual(HBelowOrEqual* instruction) { + HandleCondition(instruction); +} + +void LocationsBuilderRISCV64::VisitBooleanNot(HBooleanNot* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorRISCV64::VisitBooleanNot(HBooleanNot* instruction) { + LocationSummary* locations = instruction->GetLocations(); + __ Xori(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>(), 1); +} + +void LocationsBuilderRISCV64::VisitBoundsCheck(HBoundsCheck* instruction) { + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); + + HInstruction* index = instruction->InputAt(0); + HInstruction* length = instruction->InputAt(1); + + bool const_index = false; + bool const_length = false; + + if (length->IsConstant()) { + if (index->IsConstant()) { + const_index = true; + const_length = true; + } else { + int32_t length_value = length->AsIntConstant()->GetValue(); + if (length_value == 0 || length_value == 1) { + const_length = true; + } + } + } else if (index->IsConstant()) { + int32_t index_value = index->AsIntConstant()->GetValue(); + if (index_value <= 0) { + const_index = true; + } + } + + locations->SetInAt( + 0, + const_index ? Location::ConstantLocation(index) : Location::RequiresRegister()); + locations->SetInAt( + 1, + const_length ? Location::ConstantLocation(length) : Location::RequiresRegister()); +} + +void InstructionCodeGeneratorRISCV64::VisitBoundsCheck(HBoundsCheck* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location index_loc = locations->InAt(0); + Location length_loc = locations->InAt(1); + + if (length_loc.IsConstant()) { + int32_t length = length_loc.GetConstant()->AsIntConstant()->GetValue(); + if (index_loc.IsConstant()) { + int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue(); + if (index < 0 || index >= length) { + BoundsCheckSlowPathRISCV64* slow_path = + new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathRISCV64(instruction); + codegen_->AddSlowPath(slow_path); + __ J(slow_path->GetEntryLabel()); + } else { + // Nothing to be done. + } + return; + } + + BoundsCheckSlowPathRISCV64* slow_path = + new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathRISCV64(instruction); + codegen_->AddSlowPath(slow_path); + XRegister index = index_loc.AsRegister<XRegister>(); + if (length == 0) { + __ J(slow_path->GetEntryLabel()); + } else { + DCHECK_EQ(length, 1); + __ Bnez(index, slow_path->GetEntryLabel()); + } + } else { + XRegister length = length_loc.AsRegister<XRegister>(); + BoundsCheckSlowPathRISCV64* slow_path = + new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathRISCV64(instruction); + codegen_->AddSlowPath(slow_path); + if (index_loc.IsConstant()) { + int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue(); + if (index < 0) { + __ J(slow_path->GetEntryLabel()); + } else { + DCHECK_EQ(index, 0); + __ Blez(length, slow_path->GetEntryLabel()); + } + } else { + XRegister index = index_loc.AsRegister<XRegister>(); + __ Bgeu(index, length, slow_path->GetEntryLabel()); + } + } +} + +void LocationsBuilderRISCV64::VisitBoundType([[maybe_unused]] HBoundType* instruction) { + // Nothing to do, this should be removed during prepare for register allocator. + LOG(FATAL) << "Unreachable"; +} + +void InstructionCodeGeneratorRISCV64::VisitBoundType([[maybe_unused]] HBoundType* instruction) { + // Nothing to do, this should be removed during prepare for register allocator. + LOG(FATAL) << "Unreachable"; +} + +// Temp is used for read barrier. +static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { + if (gUseReadBarrier && + (kUseBakerReadBarrier || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + return 1; + } + return 0; +} + +// Interface case has 3 temps, one for holding the number of interfaces, one for the current +// interface pointer, one for loading the current interface. +// The other checks have one temp for loading the object's class and maybe a temp for read barrier. +static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { + if (type_check_kind == TypeCheckKind::kInterfaceCheck) + return 3; + + return 1 + NumberOfInstanceOfTemps(type_check_kind); +} + +void LocationsBuilderRISCV64::VisitCheckCast(HCheckCast* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); + locations->SetInAt(0, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } + locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); +} + +void InstructionCodeGeneratorRISCV64::VisitCheckCast(HCheckCast* instruction) { +TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + LocationSummary* locations = instruction->GetLocations(); + Location obj_loc = locations->InAt(0); + XRegister obj = obj_loc.AsRegister<XRegister>(); + Location cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? Location::NoLocation() + : locations->InAt(1); + Location temp_loc = locations->GetTemp(0); + XRegister temp = temp_loc.AsRegister<XRegister>(); + const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); + DCHECK_GE(num_temps, 1u); + DCHECK_LE(num_temps, 3u); + Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation(); + Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation(); + const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value(); + const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value(); + const uint32_t object_array_data_offset = + mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); + Riscv64Label done; + + bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); + SlowPathCodeRISCV64* slow_path = + new (codegen_->GetScopedAllocator()) TypeCheckSlowPathRISCV64( + instruction, is_type_check_slow_path_fatal); + codegen_->AddSlowPath(slow_path); + + // Avoid this check if we know `obj` is not null. + if (instruction->MustDoNullCheck()) { + __ Beqz(obj, &done); + } + + switch (type_check_kind) { + case TypeCheckKind::kExactCheck: + case TypeCheckKind::kArrayCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + // Jump to slow path for throwing the exception or doing a + // more involved array check. + __ Bne(temp, cls.AsRegister<XRegister>(), slow_path->GetEntryLabel()); + break; + } + + case TypeCheckKind::kAbstractClassCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + // If the class is abstract, we eagerly fetch the super class of the + // object to avoid doing a comparison we know will fail. + Riscv64Label loop; + __ Bind(&loop); + // /* HeapReference<Class> */ temp = temp->super_class_ + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + super_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + // If the class reference currently in `temp` is null, jump to the slow path to throw the + // exception. + __ Beqz(temp, slow_path->GetEntryLabel()); + // Otherwise, compare the classes. + __ Bne(temp, cls.AsRegister<XRegister>(), &loop); + break; + } + + case TypeCheckKind::kClassHierarchyCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + // Walk over the class hierarchy to find a match. + Riscv64Label loop; + __ Bind(&loop); + __ Beq(temp, cls.AsRegister<XRegister>(), &done); + // /* HeapReference<Class> */ temp = temp->super_class_ + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + super_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + // If the class reference currently in `temp` is null, jump to the slow path to throw the + // exception. Otherwise, jump to the beginning of the loop. + __ Bnez(temp, &loop); + __ J(slow_path->GetEntryLabel()); + break; + } + + case TypeCheckKind::kArrayObjectCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + // Do an exact check. + __ Beq(temp, cls.AsRegister<XRegister>(), &done); + // Otherwise, we need to check that the object's class is a non-primitive array. + // /* HeapReference<Class> */ temp = temp->component_type_ + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + component_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + // If the component type is null, jump to the slow path to throw the exception. + __ Beqz(temp, slow_path->GetEntryLabel()); + // Otherwise, the object is indeed an array, further check that this component + // type is not a primitive type. + __ Loadhu(temp, temp, primitive_offset); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Bnez(temp, slow_path->GetEntryLabel()); + break; + } + + case TypeCheckKind::kUnresolvedCheck: + // We always go into the type check slow path for the unresolved check case. + // We cannot directly call the CheckCast runtime entry point + // without resorting to a type checking slow path here (i.e. by + // calling InvokeRuntime directly), as it would require to + // assign fixed registers for the inputs of this HInstanceOf + // instruction (following the runtime calling convention), which + // might be cluttered by the potential first read barrier + // emission at the beginning of this method. + __ J(slow_path->GetEntryLabel()); + break; + + case TypeCheckKind::kInterfaceCheck: { + // Avoid read barriers to improve performance of the fast path. We can not get false + // positives by doing this. False negatives are handled by the slow path. + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + // /* HeapReference<Class> */ temp = temp->iftable_ + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + iftable_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + XRegister temp2 = maybe_temp2_loc.AsRegister<XRegister>(); + XRegister temp3 = maybe_temp3_loc.AsRegister<XRegister>(); + // Iftable is never null. + __ Loadw(temp2, temp, array_length_offset); + // Loop through the iftable and check if any class matches. + Riscv64Label loop; + __ Bind(&loop); + __ Beqz(temp2, slow_path->GetEntryLabel()); + __ Lwu(temp3, temp, object_array_data_offset); + codegen_->MaybeUnpoisonHeapReference(temp3); + // Go to next interface. + __ Addi(temp, temp, 2 * kHeapReferenceSize); + __ Addi(temp2, temp2, -2); + // Compare the classes and continue the loop if they do not match. + __ Bne(temp3, cls.AsRegister<XRegister>(), &loop); + break; + } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ Bnez(temp, slow_path->GetEntryLabel()); + break; + } + } + + __ Bind(&done); + __ Bind(slow_path->GetExitLabel()); +} + +void LocationsBuilderRISCV64::VisitClassTableGet(HClassTableGet* instruction) { + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorRISCV64::VisitClassTableGet(HClassTableGet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XRegister in = locations->InAt(0).AsRegister<XRegister>(); + XRegister out = locations->Out().AsRegister<XRegister>(); + if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) { + MemberOffset method_offset = + mirror::Class::EmbeddedVTableEntryOffset(instruction->GetIndex(), kRiscv64PointerSize); + __ Loadd(out, in, method_offset.SizeValue()); + } else { + uint32_t method_offset = dchecked_integral_cast<uint32_t>( + ImTable::OffsetOfElement(instruction->GetIndex(), kRiscv64PointerSize)); + __ Loadd(out, in, mirror::Class::ImtPtrOffset(kRiscv64PointerSize).Uint32Value()); + __ Loadd(out, out, method_offset); + } +} + +static int32_t GetExceptionTlsOffset() { + return Thread::ExceptionOffset<kRiscv64PointerSize>().Int32Value(); +} + +void LocationsBuilderRISCV64::VisitClearException(HClearException* instruction) { + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); +} + +void InstructionCodeGeneratorRISCV64::VisitClearException( + [[maybe_unused]] HClearException* instruction) { + __ Stored(Zero, TR, GetExceptionTlsOffset()); +} + +void LocationsBuilderRISCV64::VisitClinitCheck(HClinitCheck* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnSlowPath); + locations->SetInAt(0, Location::RequiresRegister()); + if (instruction->HasUses()) { + locations->SetOut(Location::SameAsFirstInput()); + } + // Rely on the type initialization to save everything we need. + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); +} + +void InstructionCodeGeneratorRISCV64::VisitClinitCheck(HClinitCheck* instruction) { + // We assume the class is not null. + SlowPathCodeRISCV64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathRISCV64( + instruction->GetLoadClass(), instruction); + codegen_->AddSlowPath(slow_path); + GenerateClassInitializationCheck(slow_path, + instruction->GetLocations()->InAt(0).AsRegister<XRegister>()); +} + +void LocationsBuilderRISCV64::VisitCompare(HCompare* instruction) { + DataType::Type in_type = instruction->InputAt(0)->GetType(); + + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + + switch (in_type) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, RegisterOrZeroBitPatternLocation(instruction->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + + default: + LOG(FATAL) << "Unexpected type for compare operation " << in_type; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorRISCV64::VisitCompare(HCompare* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XRegister result = locations->Out().AsRegister<XRegister>(); + DataType::Type in_type = instruction->InputAt(0)->GetType(); + + // 0 if: left == right + // 1 if: left > right + // -1 if: left < right + switch (in_type) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: { + XRegister left = locations->InAt(0).AsRegister<XRegister>(); + XRegister right = InputXRegisterOrZero(locations->InAt(1)); + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + __ Slt(tmp, left, right); + __ Slt(result, right, left); + __ Sub(result, result, tmp); + break; + } + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { + FRegister left = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister right = locations->InAt(1).AsFpuRegister<FRegister>(); + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + if (instruction->IsGtBias()) { + // ((FLE l,r) ^ 1) - (FLT l,r); see `GenerateFpCondition()`. + FLe(tmp, left, right, in_type); + FLt(result, left, right, in_type); + __ Xori(tmp, tmp, 1); + __ Sub(result, tmp, result); + } else { + // ((FLE r,l) - 1) + (FLT r,l); see `GenerateFpCondition()`. + FLe(tmp, right, left, in_type); + FLt(result, right, left, in_type); + __ Addi(tmp, tmp, -1); + __ Add(result, result, tmp); + } + break; + } + + default: + LOG(FATAL) << "Unimplemented compare type " << in_type; + } +} + +void LocationsBuilderRISCV64::VisitConstructorFence(HConstructorFence* instruction) { + instruction->SetLocations(nullptr); +} + +void InstructionCodeGeneratorRISCV64::VisitConstructorFence( + [[maybe_unused]] HConstructorFence* instruction) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + +void LocationsBuilderRISCV64::VisitCurrentMethod(HCurrentMethod* instruction) { + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetOut(Location::RegisterLocation(kArtMethodRegister)); +} + +void InstructionCodeGeneratorRISCV64::VisitCurrentMethod( + [[maybe_unused]] HCurrentMethod* instruction) { + // Nothing to do, the method is already at its location. +} + +void LocationsBuilderRISCV64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* instruction) { + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorRISCV64::VisitShouldDeoptimizeFlag( + HShouldDeoptimizeFlag* instruction) { + __ Loadw(instruction->GetLocations()->Out().AsRegister<XRegister>(), + SP, + codegen_->GetStackOffsetOfShouldDeoptimizeFlag()); +} + +void LocationsBuilderRISCV64::VisitDeoptimize(HDeoptimize* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); + if (IsBooleanValueOrMaterializedCondition(instruction->InputAt(0))) { + locations->SetInAt(0, Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorRISCV64::VisitDeoptimize(HDeoptimize* instruction) { + SlowPathCodeRISCV64* slow_path = + deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathRISCV64>(instruction); + GenerateTestAndBranch(instruction, + /* condition_input_index= */ 0, + slow_path->GetEntryLabel(), + /* false_target= */ nullptr); +} + +void LocationsBuilderRISCV64::VisitDiv(HDiv* instruction) { + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + switch (instruction->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + + default: + LOG(FATAL) << "Unexpected div type " << instruction->GetResultType(); + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorRISCV64::VisitDiv(HDiv* instruction) { + DataType::Type type = instruction->GetType(); + LocationSummary* locations = instruction->GetLocations(); + + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateDivRemIntegral(instruction); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { + FRegister dst = locations->Out().AsFpuRegister<FRegister>(); + FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>(); + FDiv(dst, lhs, rhs, type); + break; + } + default: + LOG(FATAL) << "Unexpected div type " << type; + UNREACHABLE(); + } +} + +void LocationsBuilderRISCV64::VisitDivZeroCheck(HDivZeroCheck* instruction) { + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); + locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); +} + +void InstructionCodeGeneratorRISCV64::VisitDivZeroCheck(HDivZeroCheck* instruction) { + SlowPathCodeRISCV64* slow_path = + new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathRISCV64(instruction); + codegen_->AddSlowPath(slow_path); + Location value = instruction->GetLocations()->InAt(0); + + DataType::Type type = instruction->GetType(); + + if (!DataType::IsIntegralType(type)) { + LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck."; + UNREACHABLE(); + } + + if (value.IsConstant()) { + int64_t divisor = codegen_->GetInt64ValueOf(value.GetConstant()->AsConstant()); + if (divisor == 0) { + __ J(slow_path->GetEntryLabel()); + } else { + // A division by a non-null constant is valid. We don't need to perform + // any check, so simply fall through. + } + } else { + __ Beqz(value.AsRegister<XRegister>(), slow_path->GetEntryLabel()); + } +} + +void LocationsBuilderRISCV64::VisitDoubleConstant(HDoubleConstant* instruction) { + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(instruction)); +} + +void InstructionCodeGeneratorRISCV64::VisitDoubleConstant( + [[maybe_unused]] HDoubleConstant* instruction) { + // Will be generated at use site. +} + +void LocationsBuilderRISCV64::VisitEqual(HEqual* instruction) { + HandleCondition(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitEqual(HEqual* instruction) { + HandleCondition(instruction); +} + +void LocationsBuilderRISCV64::VisitExit(HExit* instruction) { + instruction->SetLocations(nullptr); +} + +void InstructionCodeGeneratorRISCV64::VisitExit([[maybe_unused]] HExit* instruction) {} + +void LocationsBuilderRISCV64::VisitFloatConstant(HFloatConstant* instruction) { + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(instruction)); +} + +void InstructionCodeGeneratorRISCV64::VisitFloatConstant( + [[maybe_unused]] HFloatConstant* instruction) { + // Will be generated at use site. +} + +void LocationsBuilderRISCV64::VisitGoto(HGoto* instruction) { + instruction->SetLocations(nullptr); +} + +void InstructionCodeGeneratorRISCV64::VisitGoto(HGoto* instruction) { + HandleGoto(instruction, instruction->GetSuccessor()); +} + +void LocationsBuilderRISCV64::VisitGreaterThan(HGreaterThan* instruction) { + HandleCondition(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitGreaterThan(HGreaterThan* instruction) { + HandleCondition(instruction); +} + +void LocationsBuilderRISCV64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* instruction) { + HandleCondition(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* instruction) { + HandleCondition(instruction); +} + +void LocationsBuilderRISCV64::VisitIf(HIf* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + if (IsBooleanValueOrMaterializedCondition(instruction->InputAt(0))) { + locations->SetInAt(0, Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorRISCV64::VisitIf(HIf* instruction) { + HBasicBlock* true_successor = instruction->IfTrueSuccessor(); + HBasicBlock* false_successor = instruction->IfFalseSuccessor(); + Riscv64Label* true_target = codegen_->GoesToNextBlock(instruction->GetBlock(), true_successor) + ? nullptr + : codegen_->GetLabelOf(true_successor); + Riscv64Label* false_target = codegen_->GoesToNextBlock(instruction->GetBlock(), false_successor) + ? nullptr + : codegen_->GetLabelOf(false_successor); + GenerateTestAndBranch(instruction, /* condition_input_index= */ 0, true_target, false_target); +} + +void LocationsBuilderRISCV64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderRISCV64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); +} + +void LocationsBuilderRISCV64::VisitPredicatedInstanceFieldGet( + HPredicatedInstanceFieldGet* instruction) { + HandleFieldGet(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitPredicatedInstanceFieldGet( + HPredicatedInstanceFieldGet* instruction) { + Riscv64Label finish; + LocationSummary* locations = instruction->GetLocations(); + XRegister target = locations->InAt(1).AsRegister<XRegister>(); + __ Beqz(target, &finish); + HandleFieldGet(instruction, instruction->GetFieldInfo()); + __ Bind(&finish); +} + +void LocationsBuilderRISCV64::VisitInstanceOf(HInstanceOf* instruction) { + LocationSummary::CallKind call_kind = LocationSummary::kNoCall; + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool baker_read_barrier_slow_path = false; + switch (type_check_kind) { + case TypeCheckKind::kExactCheck: + case TypeCheckKind::kAbstractClassCheck: + case TypeCheckKind::kClassHierarchyCheck: + case TypeCheckKind::kArrayObjectCheck: { + bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); + call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; + break; + } + case TypeCheckKind::kArrayCheck: + case TypeCheckKind::kUnresolvedCheck: + case TypeCheckKind::kInterfaceCheck: + call_kind = LocationSummary::kCallOnSlowPath; + break; + case TypeCheckKind::kBitstringCheck: + break; + } + + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); + if (baker_read_barrier_slow_path) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } + locations->SetInAt(0, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } + // The output does overlap inputs. + // Note that TypeCheckSlowPathRISCV64 uses this register too. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); +} + +void InstructionCodeGeneratorRISCV64::VisitInstanceOf(HInstanceOf* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + LocationSummary* locations = instruction->GetLocations(); + Location obj_loc = locations->InAt(0); + XRegister obj = obj_loc.AsRegister<XRegister>(); + Location cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? Location::NoLocation() + : locations->InAt(1); + Location out_loc = locations->Out(); + XRegister out = out_loc.AsRegister<XRegister>(); + const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); + DCHECK_LE(num_temps, 1u); + Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation(); + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + Riscv64Label done; + SlowPathCodeRISCV64* slow_path = nullptr; + + // Return 0 if `obj` is null. + // Avoid this check if we know `obj` is not null. + if (instruction->MustDoNullCheck()) { + __ Mv(out, Zero); + __ Beqz(obj, &done); + } + + switch (type_check_kind) { + case TypeCheckKind::kExactCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); + // /* HeapReference<Class> */ out = obj->klass_ + GenerateReferenceLoadTwoRegisters( + instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, read_barrier_option); + // Classes must be equal for the instanceof to succeed. + __ Xor(out, out, cls.AsRegister<XRegister>()); + __ Seqz(out, out); + break; + } + + case TypeCheckKind::kAbstractClassCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); + // /* HeapReference<Class> */ out = obj->klass_ + GenerateReferenceLoadTwoRegisters( + instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, read_barrier_option); + // If the class is abstract, we eagerly fetch the super class of the + // object to avoid doing a comparison we know will fail. + Riscv64Label loop; + __ Bind(&loop); + // /* HeapReference<Class> */ out = out->super_class_ + GenerateReferenceLoadOneRegister( + instruction, out_loc, super_offset, maybe_temp_loc, read_barrier_option); + // If `out` is null, we use it for the result, and jump to `done`. + __ Beqz(out, &done); + __ Bne(out, cls.AsRegister<XRegister>(), &loop); + __ LoadConst32(out, 1); + break; + } + + case TypeCheckKind::kClassHierarchyCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); + // /* HeapReference<Class> */ out = obj->klass_ + GenerateReferenceLoadTwoRegisters( + instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, read_barrier_option); + // Walk over the class hierarchy to find a match. + Riscv64Label loop, success; + __ Bind(&loop); + __ Beq(out, cls.AsRegister<XRegister>(), &success); + // /* HeapReference<Class> */ out = out->super_class_ + GenerateReferenceLoadOneRegister( + instruction, out_loc, super_offset, maybe_temp_loc, read_barrier_option); + __ Bnez(out, &loop); + // If `out` is null, we use it for the result, and jump to `done`. + __ J(&done); + __ Bind(&success); + __ LoadConst32(out, 1); + break; + } + + case TypeCheckKind::kArrayObjectCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); + // FIXME(riscv64): We currently have marking entrypoints for 29 registers. + // We need to either store entrypoint for register `N` in entry `N-A` where + // `A` can be up to 5 (Zero, RA, SP, GP, TP are not valid registers for + // marking), or define two more entrypoints, or request an additional temp + // from the register allocator instead of using a scratch register. + ScratchRegisterScope srs(GetAssembler()); + Location tmp = Location::RegisterLocation(srs.AllocateXRegister()); + // /* HeapReference<Class> */ tmp = obj->klass_ + GenerateReferenceLoadTwoRegisters( + instruction, tmp, obj_loc, class_offset, maybe_temp_loc, read_barrier_option); + // Do an exact check. + __ LoadConst32(out, 1); + __ Beq(tmp.AsRegister<XRegister>(), cls.AsRegister<XRegister>(), &done); + // Otherwise, we need to check that the object's class is a non-primitive array. + // /* HeapReference<Class> */ out = out->component_type_ + GenerateReferenceLoadTwoRegisters( + instruction, out_loc, tmp, component_offset, maybe_temp_loc, read_barrier_option); + // If `out` is null, we use it for the result, and jump to `done`. + __ Beqz(out, &done); + __ Loadhu(out, out, primitive_offset); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Seqz(out, out); + break; + } + + case TypeCheckKind::kArrayCheck: { + // No read barrier since the slow path will retry upon failure. + // /* HeapReference<Class> */ out = obj->klass_ + GenerateReferenceLoadTwoRegisters( + instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, kWithoutReadBarrier); + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (codegen_->GetScopedAllocator()) + TypeCheckSlowPathRISCV64(instruction, /* is_fatal= */ false); + codegen_->AddSlowPath(slow_path); + __ Bne(out, cls.AsRegister<XRegister>(), slow_path->GetEntryLabel()); + __ LoadConst32(out, 1); + break; + } + + case TypeCheckKind::kUnresolvedCheck: + case TypeCheckKind::kInterfaceCheck: { + // Note that we indeed only call on slow path, but we always go + // into the slow path for the unresolved and interface check + // cases. + // + // We cannot directly call the InstanceofNonTrivial runtime + // entry point without resorting to a type checking slow path + // here (i.e. by calling InvokeRuntime directly), as it would + // require to assign fixed registers for the inputs of this + // HInstanceOf instruction (following the runtime calling + // convention), which might be cluttered by the potential first + // read barrier emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathRISCV64( + instruction, /* is_fatal= */ false); + codegen_->AddSlowPath(slow_path); + __ J(slow_path->GetEntryLabel()); + break; + } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters( + instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ Beqz(out, out); + break; + } + } + + __ Bind(&done); + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void LocationsBuilderRISCV64::VisitIntConstant(HIntConstant* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetOut(Location::ConstantLocation(instruction)); +} + +void InstructionCodeGeneratorRISCV64::VisitIntConstant([[maybe_unused]] HIntConstant* instruction) { + // Will be generated at use site. +} + +void LocationsBuilderRISCV64::VisitIntermediateAddress(HIntermediateAddress* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitIntermediateAddress(HIntermediateAddress* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitInvokeUnresolved(HInvokeUnresolved* instruction) { + // The trampoline uses the same calling convention as dex calling conventions, except + // instead of loading arg0/A0 with the target Method*, arg0/A0 will contain the method_idx. + HandleInvoke(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitInvokeUnresolved(HInvokeUnresolved* instruction) { + codegen_->GenerateInvokeUnresolvedRuntimeCall(instruction); +} + +void LocationsBuilderRISCV64::VisitInvokeInterface(HInvokeInterface* instruction) { + HandleInvoke(instruction); + // Use T0 as the hidden argument for `art_quick_imt_conflict_trampoline`. + if (instruction->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) { + instruction->GetLocations()->SetInAt(instruction->GetNumberOfArguments() - 1, + Location::RegisterLocation(T0)); + } else { + instruction->GetLocations()->AddTemp(Location::RegisterLocation(T0)); + } +} + +void InstructionCodeGeneratorRISCV64::VisitInvokeInterface(HInvokeInterface* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XRegister temp = locations->GetTemp(0).AsRegister<XRegister>(); + XRegister receiver = locations->InAt(0).AsRegister<XRegister>(); + int32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kRiscv64PointerSize); + + // /* HeapReference<Class> */ temp = receiver->klass_ + __ Loadwu(temp, receiver, class_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). + codegen_->MaybeUnpoisonHeapReference(temp); + + // If we're compiling baseline, update the inline cache. + codegen_->MaybeGenerateInlineCacheCheck(instruction, temp); + + // The register T0 is required to be used for the hidden argument in + // `art_quick_imt_conflict_trampoline`. + if (instruction->GetHiddenArgumentLoadKind() != MethodLoadKind::kRecursive && + instruction->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) { + Location hidden_reg = instruction->GetLocations()->GetTemp(1); + // Load the resolved interface method in the hidden argument register T0. + DCHECK_EQ(T0, hidden_reg.AsRegister<XRegister>()); + codegen_->LoadMethod(instruction->GetHiddenArgumentLoadKind(), hidden_reg, instruction); + } + + __ Loadd(temp, temp, mirror::Class::ImtPtrOffset(kRiscv64PointerSize).Uint32Value()); + uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( + instruction->GetImtIndex(), kRiscv64PointerSize)); + // temp = temp->GetImtEntryAt(method_offset); + __ Loadd(temp, temp, method_offset); + if (instruction->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) { + // We pass the method from the IMT in case of a conflict. This will ensure + // we go into the runtime to resolve the actual method. + Location hidden_reg = instruction->GetLocations()->GetTemp(1); + DCHECK_EQ(T0, hidden_reg.AsRegister<XRegister>()); + __ Mv(hidden_reg.AsRegister<XRegister>(), temp); + } + // RA = temp->GetEntryPoint(); + __ Loadd(RA, temp, entry_point.Int32Value()); + + // RA(); + __ Jalr(RA); + DCHECK(!codegen_->IsLeafMethod()); + codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); +} + +void LocationsBuilderRISCV64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) { + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!instruction->IsStaticWithExplicitClinitCheck()); + + IntrinsicLocationsBuilderRISCV64 intrinsic(GetGraph()->GetAllocator(), codegen_); + if (intrinsic.TryDispatch(instruction)) { + return; + } + + if (instruction->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) { + CriticalNativeCallingConventionVisitorRiscv64 calling_convention_visitor( + /*for_register_allocation=*/ true); + CodeGenerator::CreateCommonInvokeLocationSummary(instruction, &calling_convention_visitor); + } else { + HandleInvoke(instruction); + } +} + +static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorRISCV64* codegen) { + if (invoke->GetLocations()->Intrinsified()) { + IntrinsicCodeGeneratorRISCV64 intrinsic(codegen); + intrinsic.Dispatch(invoke); + return true; + } + return false; +} + +void InstructionCodeGeneratorRISCV64::VisitInvokeStaticOrDirect( + HInvokeStaticOrDirect* instruction) { + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!instruction->IsStaticWithExplicitClinitCheck()); + + if (TryGenerateIntrinsicCode(instruction, codegen_)) { + return; + } + + LocationSummary* locations = instruction->GetLocations(); + codegen_->GenerateStaticOrDirectCall( + instruction, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); +} + +void LocationsBuilderRISCV64::VisitInvokeVirtual(HInvokeVirtual* instruction) { + IntrinsicLocationsBuilderRISCV64 intrinsic(GetGraph()->GetAllocator(), codegen_); + if (intrinsic.TryDispatch(instruction)) { + return; + } + + HandleInvoke(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitInvokeVirtual(HInvokeVirtual* instruction) { + if (TryGenerateIntrinsicCode(instruction, codegen_)) { + return; + } + + codegen_->GenerateVirtualCall(instruction, instruction->GetLocations()->GetTemp(0)); + DCHECK(!codegen_->IsLeafMethod()); +} + +void LocationsBuilderRISCV64::VisitInvokePolymorphic(HInvokePolymorphic* instruction) { + HandleInvoke(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitInvokePolymorphic(HInvokePolymorphic* instruction) { + codegen_->GenerateInvokePolymorphicCall(instruction); +} + +void LocationsBuilderRISCV64::VisitInvokeCustom(HInvokeCustom* instruction) { + HandleInvoke(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitInvokeCustom(HInvokeCustom* instruction) { + codegen_->GenerateInvokeCustomCall(instruction); +} + +void LocationsBuilderRISCV64::VisitLessThan(HLessThan* instruction) { + HandleCondition(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitLessThan(HLessThan* instruction) { + HandleCondition(instruction); +} + +void LocationsBuilderRISCV64::VisitLessThanOrEqual(HLessThanOrEqual* instruction) { + HandleCondition(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitLessThanOrEqual(HLessThanOrEqual* instruction) { + HandleCondition(instruction); +} + +void LocationsBuilderRISCV64::VisitLoadClass(HLoadClass* instruction) { + HLoadClass::LoadKind load_kind = instruction->GetLoadKind(); + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { + InvokeRuntimeCallingConvention calling_convention; + Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + DCHECK_EQ(DataType::Type::kReference, instruction->GetType()); + DCHECK(loc.Equals(calling_convention.GetReturnLocation(DataType::Type::kReference))); + CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(instruction, loc, loc); + return; + } + DCHECK_EQ(instruction->NeedsAccessCheck(), + load_kind == HLoadClass::LoadKind::kBssEntryPublic || + load_kind == HLoadClass::LoadKind::kBssEntryPackage); + + const bool requires_read_barrier = gUseReadBarrier && !instruction->IsInBootImage(); + LocationSummary::CallKind call_kind = (instruction->NeedsEnvironment() || requires_read_barrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); + if (kUseBakerReadBarrier && requires_read_barrier && !instruction->NeedsEnvironment()) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } + if (load_kind == HLoadClass::LoadKind::kReferrersClass) { + locations->SetInAt(0, Location::RequiresRegister()); + } + locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadClass::LoadKind::kBssEntry || + load_kind == HLoadClass::LoadKind::kBssEntryPublic || + load_kind == HLoadClass::LoadKind::kBssEntryPackage) { + if (!gUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the type resolution or initialization and marking to save everything we need. + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); + } else { + // For non-Baker read barriers we have a temp-clobbering call. + } + } +} + +// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not +// move. +void InstructionCodeGeneratorRISCV64::VisitLoadClass(HLoadClass* instruction) + NO_THREAD_SAFETY_ANALYSIS { + HLoadClass::LoadKind load_kind = instruction->GetLoadKind(); + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { + codegen_->GenerateLoadClassRuntimeCall(instruction); + return; + } + DCHECK_EQ(instruction->NeedsAccessCheck(), + load_kind == HLoadClass::LoadKind::kBssEntryPublic || + load_kind == HLoadClass::LoadKind::kBssEntryPackage); + + LocationSummary* locations = instruction->GetLocations(); + Location out_loc = locations->Out(); + XRegister out = out_loc.AsRegister<XRegister>(); + const ReadBarrierOption read_barrier_option = + instruction->IsInBootImage() ? kWithoutReadBarrier : GetCompilerReadBarrierOption(); + bool generate_null_check = false; + switch (load_kind) { + case HLoadClass::LoadKind::kReferrersClass: { + DCHECK(!instruction->CanCallRuntime()); + DCHECK(!instruction->MustGenerateClinitCheck()); + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + XRegister current_method = locations->InAt(0).AsRegister<XRegister>(); + GenerateGcRootFieldLoad(instruction, + out_loc, + current_method, + ArtMethod::DeclaringClassOffset().Int32Value(), + read_barrier_option); + break; + } + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(codegen_->GetCompilerOptions().IsBootImage() || + codegen_->GetCompilerOptions().IsBootImageExtension()); + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_high = + codegen_->NewBootImageTypePatch(instruction->GetDexFile(), instruction->GetTypeIndex()); + codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = + codegen_->NewBootImageTypePatch( + instruction->GetDexFile(), instruction->GetTypeIndex(), info_high); + codegen_->EmitPcRelativeAddiPlaceholder(info_low, out, out); + break; + } + case HLoadClass::LoadKind::kBootImageRelRo: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + uint32_t boot_image_offset = codegen_->GetBootImageOffset(instruction); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_high = + codegen_->NewBootImageRelRoPatch(boot_image_offset); + codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = + codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high); + codegen_->EmitPcRelativeLwuPlaceholder(info_low, out, out); + break; + } + case HLoadClass::LoadKind::kBssEntry: + case HLoadClass::LoadKind::kBssEntryPublic: + case HLoadClass::LoadKind::kBssEntryPackage: { + CodeGeneratorRISCV64::PcRelativePatchInfo* bss_info_high = + codegen_->NewTypeBssEntryPatch(instruction); + codegen_->EmitPcRelativeAuipcPlaceholder(bss_info_high, out); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = codegen_->NewTypeBssEntryPatch( + instruction, bss_info_high); + GenerateGcRootFieldLoad(instruction, + out_loc, + out, + /* offset= */ kLinkTimeOffsetPlaceholderLow, + read_barrier_option, + &info_low->label); + generate_null_check = true; + break; + } + case HLoadClass::LoadKind::kJitBootImageAddress: { + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); + uint32_t address = reinterpret_cast32<uint32_t>(instruction->GetClass().Get()); + DCHECK_NE(address, 0u); + __ Loadwu(out, codegen_->DeduplicateBootImageAddressLiteral(address)); + break; + } + case HLoadClass::LoadKind::kJitTableAddress: + __ Loadwu(out, codegen_->DeduplicateJitClassLiteral(instruction->GetDexFile(), + instruction->GetTypeIndex(), + instruction->GetClass())); + GenerateGcRootFieldLoad(instruction, out_loc, out, /* offset= */ 0, read_barrier_option); + break; + case HLoadClass::LoadKind::kRuntimeCall: + case HLoadClass::LoadKind::kInvalid: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + } + + if (generate_null_check || instruction->MustGenerateClinitCheck()) { + DCHECK(instruction->CanCallRuntime()); + SlowPathCodeRISCV64* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathRISCV64(instruction, instruction); + codegen_->AddSlowPath(slow_path); + if (generate_null_check) { + __ Beqz(out, slow_path->GetEntryLabel()); + } + if (instruction->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } + } +} + +void LocationsBuilderRISCV64::VisitLoadException(HLoadException* instruction) { + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorRISCV64::VisitLoadException(HLoadException* instruction) { + XRegister out = instruction->GetLocations()->Out().AsRegister<XRegister>(); + __ Loadwu(out, TR, GetExceptionTlsOffset()); +} + +void LocationsBuilderRISCV64::VisitLoadMethodHandle(HLoadMethodHandle* instruction) { + InvokeRuntimeCallingConvention calling_convention; + Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(instruction, loc, loc); +} + +void InstructionCodeGeneratorRISCV64::VisitLoadMethodHandle(HLoadMethodHandle* instruction) { + codegen_->GenerateLoadMethodHandleRuntimeCall(instruction); +} + +void LocationsBuilderRISCV64::VisitLoadMethodType(HLoadMethodType* instruction) { + InvokeRuntimeCallingConvention calling_convention; + Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(instruction, loc, loc); +} + +void InstructionCodeGeneratorRISCV64::VisitLoadMethodType(HLoadMethodType* instruction) { + codegen_->GenerateLoadMethodTypeRuntimeCall(instruction); +} + +void LocationsBuilderRISCV64::VisitLoadString(HLoadString* instruction) { + HLoadString::LoadKind load_kind = instruction->GetLoadKind(); + LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(instruction); + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); + if (load_kind == HLoadString::LoadKind::kRuntimeCall) { + InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(DataType::Type::kReference, instruction->GetType()); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); + } else { + locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadString::LoadKind::kBssEntry) { + if (!gUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the pResolveString and marking to save everything we need. + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); + } else { + // For non-Baker read barriers we have a temp-clobbering call. + } + } + } +} + +// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not +// move. +void InstructionCodeGeneratorRISCV64::VisitLoadString(HLoadString* instruction) + NO_THREAD_SAFETY_ANALYSIS { + HLoadString::LoadKind load_kind = instruction->GetLoadKind(); + LocationSummary* locations = instruction->GetLocations(); + Location out_loc = locations->Out(); + XRegister out = out_loc.AsRegister<XRegister>(); + + switch (load_kind) { + case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(codegen_->GetCompilerOptions().IsBootImage() || + codegen_->GetCompilerOptions().IsBootImageExtension()); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_high = codegen_->NewBootImageStringPatch( + instruction->GetDexFile(), instruction->GetStringIndex()); + codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = codegen_->NewBootImageStringPatch( + instruction->GetDexFile(), instruction->GetStringIndex(), info_high); + codegen_->EmitPcRelativeAddiPlaceholder(info_low, out, out); + return; + } + case HLoadString::LoadKind::kBootImageRelRo: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + uint32_t boot_image_offset = codegen_->GetBootImageOffset(instruction); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_high = + codegen_->NewBootImageRelRoPatch(boot_image_offset); + codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = + codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high); + codegen_->EmitPcRelativeLwuPlaceholder(info_low, out, out); + return; + } + case HLoadString::LoadKind::kBssEntry: { + CodeGeneratorRISCV64::PcRelativePatchInfo* info_high = codegen_->NewStringBssEntryPatch( + instruction->GetDexFile(), instruction->GetStringIndex()); + codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = codegen_->NewStringBssEntryPatch( + instruction->GetDexFile(), instruction->GetStringIndex(), info_high); + GenerateGcRootFieldLoad(instruction, + out_loc, + out, + /* offset= */ kLinkTimeOffsetPlaceholderLow, + GetCompilerReadBarrierOption(), + &info_low->label); + SlowPathCodeRISCV64* slow_path = + new (codegen_->GetScopedAllocator()) LoadStringSlowPathRISCV64(instruction); + codegen_->AddSlowPath(slow_path); + __ Beqz(out, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + case HLoadString::LoadKind::kJitBootImageAddress: { + uint32_t address = reinterpret_cast32<uint32_t>(instruction->GetString().Get()); + DCHECK_NE(address, 0u); + __ Loadwu(out, codegen_->DeduplicateBootImageAddressLiteral(address)); + return; + } + case HLoadString::LoadKind::kJitTableAddress: + __ Loadwu( + out, + codegen_->DeduplicateJitStringLiteral( + instruction->GetDexFile(), instruction->GetStringIndex(), instruction->GetString())); + GenerateGcRootFieldLoad(instruction, out_loc, out, 0, GetCompilerReadBarrierOption()); + return; + default: + break; + } + + DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall); + InvokeRuntimeCallingConvention calling_convention; + DCHECK(calling_convention.GetReturnLocation(DataType::Type::kReference).Equals(out_loc)); + __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetStringIndex().index_); + codegen_->InvokeRuntime(kQuickResolveString, instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); +} + +void LocationsBuilderRISCV64::VisitLongConstant(HLongConstant* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetOut(Location::ConstantLocation(instruction)); +} + +void InstructionCodeGeneratorRISCV64::VisitLongConstant( + [[maybe_unused]] HLongConstant* instruction) { + // Will be generated at use site. +} + +void LocationsBuilderRISCV64::VisitMax(HMax* instruction) { + HandleBinaryOp(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitMax(HMax* instruction) { + HandleBinaryOp(instruction); +} + +void LocationsBuilderRISCV64::VisitMemoryBarrier(HMemoryBarrier* instruction) { + instruction->SetLocations(nullptr); +} + +void InstructionCodeGeneratorRISCV64::VisitMemoryBarrier(HMemoryBarrier* instruction) { + codegen_->GenerateMemoryBarrier(instruction->GetBarrierKind()); +} + +void LocationsBuilderRISCV64::VisitMethodEntryHook(HMethodEntryHook* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitMethodEntryHook(HMethodEntryHook* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitMethodExitHook(HMethodExitHook* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitMethodExitHook(HMethodExitHook* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitMin(HMin* instruction) { + HandleBinaryOp(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitMin(HMin* instruction) { + HandleBinaryOp(instruction); +} + +void LocationsBuilderRISCV64::VisitMonitorOperation(HMonitorOperation* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); +} + +void InstructionCodeGeneratorRISCV64::VisitMonitorOperation(HMonitorOperation* instruction) { + codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject, + instruction, + instruction->GetDexPc()); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } +} + +void LocationsBuilderRISCV64::VisitMul(HMul* instruction) { + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + switch (instruction->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + + default: + LOG(FATAL) << "Unexpected mul type " << instruction->GetResultType(); + } +} + +void InstructionCodeGeneratorRISCV64::VisitMul(HMul* instruction) { + LocationSummary* locations = instruction->GetLocations(); + switch (instruction->GetResultType()) { + case DataType::Type::kInt32: + __ Mulw(locations->Out().AsRegister<XRegister>(), + locations->InAt(0).AsRegister<XRegister>(), + locations->InAt(1).AsRegister<XRegister>()); + break; + + case DataType::Type::kInt64: + __ Mul(locations->Out().AsRegister<XRegister>(), + locations->InAt(0).AsRegister<XRegister>(), + locations->InAt(1).AsRegister<XRegister>()); + break; + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + FMul(locations->Out().AsFpuRegister<FRegister>(), + locations->InAt(0).AsFpuRegister<FRegister>(), + locations->InAt(1).AsFpuRegister<FRegister>(), + instruction->GetResultType()); + break; + + default: + LOG(FATAL) << "Unexpected mul type " << instruction->GetResultType(); + } +} + +void LocationsBuilderRISCV64::VisitNeg(HNeg* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + switch (instruction->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + + default: + LOG(FATAL) << "Unexpected neg type " << instruction->GetResultType(); + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorRISCV64::VisitNeg(HNeg* instruction) { + LocationSummary* locations = instruction->GetLocations(); + switch (instruction->GetResultType()) { + case DataType::Type::kInt32: + __ NegW(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>()); + break; + + case DataType::Type::kInt64: + __ Neg(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>()); + break; + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + FNeg(locations->Out().AsFpuRegister<FRegister>(), + locations->InAt(0).AsFpuRegister<FRegister>(), + instruction->GetResultType()); + break; + + default: + LOG(FATAL) << "Unexpected neg type " << instruction->GetResultType(); + UNREACHABLE(); + } +} + +void LocationsBuilderRISCV64::VisitNewArray(HNewArray* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + InvokeRuntimeCallingConvention calling_convention; + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); +} + +void InstructionCodeGeneratorRISCV64::VisitNewArray(HNewArray* instruction) { + QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction); + codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); + DCHECK(!codegen_->IsLeafMethod()); +} + +void LocationsBuilderRISCV64::VisitNewInstance(HNewInstance* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); +} + +void InstructionCodeGeneratorRISCV64::VisitNewInstance(HNewInstance* instruction) { + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); +} + +void LocationsBuilderRISCV64::VisitNop(HNop* instruction) { + new (GetGraph()->GetAllocator()) LocationSummary(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitNop([[maybe_unused]] HNop* instruction) { + // The environment recording already happened in CodeGenerator::Compile. +} + +void LocationsBuilderRISCV64::VisitNot(HNot* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorRISCV64::VisitNot(HNot* instruction) { + LocationSummary* locations = instruction->GetLocations(); + switch (instruction->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + __ Not(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>()); + break; + + default: + LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType(); + UNREACHABLE(); + } +} + +void LocationsBuilderRISCV64::VisitNotEqual(HNotEqual* instruction) { + HandleCondition(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitNotEqual(HNotEqual* instruction) { + HandleCondition(instruction); +} + +void LocationsBuilderRISCV64::VisitNullConstant(HNullConstant* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetOut(Location::ConstantLocation(instruction)); +} + +void InstructionCodeGeneratorRISCV64::VisitNullConstant( + [[maybe_unused]] HNullConstant* instruction) { + // Will be generated at use site. +} + +void LocationsBuilderRISCV64::VisitNullCheck(HNullCheck* instruction) { + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); + locations->SetInAt(0, Location::RequiresRegister()); +} + +void InstructionCodeGeneratorRISCV64::VisitNullCheck(HNullCheck* instruction) { + codegen_->GenerateNullCheck(instruction); +} + +void LocationsBuilderRISCV64::VisitOr(HOr* instruction) { + HandleBinaryOp(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitOr(HOr* instruction) { + HandleBinaryOp(instruction); +} + +void LocationsBuilderRISCV64::VisitPackedSwitch(HPackedSwitch* instruction) { + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); +} + +void InstructionCodeGeneratorRISCV64::VisitPackedSwitch(HPackedSwitch* instruction) { + int32_t lower_bound = instruction->GetStartValue(); + uint32_t num_entries = instruction->GetNumEntries(); + LocationSummary* locations = instruction->GetLocations(); + XRegister value = locations->InAt(0).AsRegister<XRegister>(); + HBasicBlock* switch_block = instruction->GetBlock(); + HBasicBlock* default_block = instruction->GetDefaultBlock(); + + // Prepare a temporary register and an adjusted zero-based value. + ScratchRegisterScope srs(GetAssembler()); + XRegister temp = srs.AllocateXRegister(); + XRegister adjusted = value; + if (lower_bound != 0) { + adjusted = temp; + __ AddConst32(temp, value, -lower_bound); + } + + // Jump to the default block if the index is out of the packed switch value range. + // Note: We could save one instruction for `num_entries == 1` with BNEZ but the + // `HInstructionBuilder` transforms that case to an `HIf`, so let's keep the code simple. + CHECK_NE(num_entries, 0u); // `HInstructionBuilder` creates a `HGoto` for empty packed-switch. + { + ScratchRegisterScope srs2(GetAssembler()); + XRegister temp2 = srs2.AllocateXRegister(); + __ LoadConst32(temp2, num_entries); + __ Bgeu(adjusted, temp2, codegen_->GetLabelOf(default_block)); // Can clobber `TMP` if taken. + } + + if (num_entries >= kPackedSwitchCompareJumpThreshold) { + GenTableBasedPackedSwitch(adjusted, temp, num_entries, switch_block); + } else { + GenPackedSwitchWithCompares(adjusted, temp, num_entries, switch_block); + } +} + +void LocationsBuilderRISCV64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) { + LOG(FATAL) << "Unreachable"; +} + +void InstructionCodeGeneratorRISCV64::VisitParallelMove(HParallelMove* instruction) { + if (instruction->GetNext()->IsSuspendCheck() && + instruction->GetBlock()->GetLoopInformation() != nullptr) { + HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck(); + // The back edge will generate the suspend check. + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction); + } + + codegen_->GetMoveResolver()->EmitNativeCode(instruction); +} + +void LocationsBuilderRISCV64::VisitParameterValue(HParameterValue* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); + if (location.IsStackSlot()) { + location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); + } else if (location.IsDoubleStackSlot()) { + location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); + } + locations->SetOut(location); +} + +void InstructionCodeGeneratorRISCV64::VisitParameterValue( + [[maybe_unused]] HParameterValue* instruction) { + // Nothing to do, the parameter is already at its location. +} + +void LocationsBuilderRISCV64::VisitPhi(HPhi* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { + locations->SetInAt(i, Location::Any()); + } + locations->SetOut(Location::Any()); +} + +void InstructionCodeGeneratorRISCV64::VisitPhi([[maybe_unused]] HPhi* instruction) { + LOG(FATAL) << "Unreachable"; +} + +void LocationsBuilderRISCV64::VisitRem(HRem* instruction) { + DataType::Type type = instruction->GetResultType(); + LocationSummary::CallKind call_kind = + DataType::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly + : LocationSummary::kNoCall; + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); + + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); + locations->SetOut(calling_convention.GetReturnLocation(type)); + break; + } + + default: + LOG(FATAL) << "Unexpected rem type " << type; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorRISCV64::VisitRem(HRem* instruction) { + DataType::Type type = instruction->GetType(); + + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateDivRemIntegral(instruction); + break; + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { + QuickEntrypointEnum entrypoint = + (type == DataType::Type::kFloat32) ? kQuickFmodf : kQuickFmod; + codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); + if (type == DataType::Type::kFloat32) { + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); + } else { + CheckEntrypointTypes<kQuickFmod, double, double, double>(); + } + break; + } + default: + LOG(FATAL) << "Unexpected rem type " << type; + UNREACHABLE(); + } +} + +void LocationsBuilderRISCV64::VisitReturn(HReturn* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + DataType::Type return_type = instruction->InputAt(0)->GetType(); + DCHECK_NE(return_type, DataType::Type::kVoid); + locations->SetInAt(0, Riscv64ReturnLocation(return_type)); +} + +void InstructionCodeGeneratorRISCV64::VisitReturn(HReturn* instruction) { + if (GetGraph()->IsCompilingOsr()) { + // To simplify callers of an OSR method, we put a floating point return value + // in both floating point and core return registers. + switch (instruction->InputAt(0)->GetType()) { + case DataType::Type::kFloat32: + __ FMvXW(A0, FA0); + break; + case DataType::Type::kFloat64: + __ FMvXD(A0, FA0); + break; + default: + break; + } + } + codegen_->GenerateFrameExit(); +} + +void LocationsBuilderRISCV64::VisitReturnVoid(HReturnVoid* instruction) { + instruction->SetLocations(nullptr); +} + +void InstructionCodeGeneratorRISCV64::VisitReturnVoid([[maybe_unused]] HReturnVoid* instruction) { + codegen_->GenerateFrameExit(); +} + +void LocationsBuilderRISCV64::VisitRor(HRor* instruction) { + HandleShift(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitRor(HRor* instruction) { + HandleShift(instruction); +} + +void LocationsBuilderRISCV64::VisitShl(HShl* instruction) { + HandleShift(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitShl(HShl* instruction) { + HandleShift(instruction); +} + +void LocationsBuilderRISCV64::VisitShr(HShr* instruction) { + HandleShift(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitShr(HShr* instruction) { + HandleShift(instruction); +} + +void LocationsBuilderRISCV64::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderRISCV64::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); +} + +void LocationsBuilderRISCV64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { + codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(A0)); +} + +void InstructionCodeGeneratorRISCV64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { + __ LoadConst32(A0, instruction->GetFormat()->GetValue()); + codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc()); +} + +void LocationsBuilderRISCV64::VisitUnresolvedInstanceFieldGet( + HUnresolvedInstanceFieldGet* instruction) { + FieldAccessCallingConventionRISCV64 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorRISCV64::VisitUnresolvedInstanceFieldGet( + HUnresolvedInstanceFieldGet* instruction) { + FieldAccessCallingConventionRISCV64 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderRISCV64::VisitUnresolvedInstanceFieldSet( + HUnresolvedInstanceFieldSet* instruction) { + FieldAccessCallingConventionRISCV64 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorRISCV64::VisitUnresolvedInstanceFieldSet( + HUnresolvedInstanceFieldSet* instruction) { + FieldAccessCallingConventionRISCV64 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderRISCV64::VisitUnresolvedStaticFieldGet( + HUnresolvedStaticFieldGet* instruction) { + FieldAccessCallingConventionRISCV64 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorRISCV64::VisitUnresolvedStaticFieldGet( + HUnresolvedStaticFieldGet* instruction) { + FieldAccessCallingConventionRISCV64 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderRISCV64::VisitUnresolvedStaticFieldSet( + HUnresolvedStaticFieldSet* instruction) { + FieldAccessCallingConventionRISCV64 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorRISCV64::VisitUnresolvedStaticFieldSet( + HUnresolvedStaticFieldSet* instruction) { + FieldAccessCallingConventionRISCV64 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderRISCV64::VisitSelect(HSelect* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitSelect(HSelect* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitSub(HSub* instruction) { + HandleBinaryOp(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitSub(HSub* instruction) { + HandleBinaryOp(instruction); +} + +void LocationsBuilderRISCV64::VisitSuspendCheck(HSuspendCheck* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + // In suspend check slow path, usually there are no caller-save registers at all. + // If SIMD instructions are present, however, we force spilling all live SIMD + // registers in full width (since the runtime only saves/restores lower part). + locations->SetCustomSlowPathCallerSaves(GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : + RegisterSet::Empty()); +} + +void InstructionCodeGeneratorRISCV64::VisitSuspendCheck(HSuspendCheck* instruction) { + HBasicBlock* block = instruction->GetBlock(); + if (block->GetLoopInformation() != nullptr) { + DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction); + // The back edge will generate the suspend check. + return; + } + if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) { + // The goto will generate the suspend check. + return; + } + GenerateSuspendCheck(instruction, nullptr); +} + +void LocationsBuilderRISCV64::VisitThrow(HThrow* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); +} + +void InstructionCodeGeneratorRISCV64::VisitThrow(HThrow* instruction) { + codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); +} + +void LocationsBuilderRISCV64::VisitTryBoundary(HTryBoundary* instruction) { + instruction->SetLocations(nullptr); +} + +void InstructionCodeGeneratorRISCV64::VisitTryBoundary(HTryBoundary* instruction) { + HBasicBlock* successor = instruction->GetNormalFlowSuccessor(); + if (!successor->IsExitBlock()) { + HandleGoto(instruction, successor); + } +} + +void LocationsBuilderRISCV64::VisitTypeConversion(HTypeConversion* instruction) { + DataType::Type input_type = instruction->GetInputType(); + DataType::Type result_type = instruction->GetResultType(); + DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) + << input_type << " -> " << result_type; + + if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) || + (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) { + LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; + } + + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + + if (DataType::IsFloatingPointType(input_type)) { + locations->SetInAt(0, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + } + + if (DataType::IsFloatingPointType(result_type)) { + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + } else { + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + } +} + +void InstructionCodeGeneratorRISCV64::VisitTypeConversion(HTypeConversion* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DataType::Type result_type = instruction->GetResultType(); + DataType::Type input_type = instruction->GetInputType(); + + DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) + << input_type << " -> " << result_type; + + if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) { + XRegister dst = locations->Out().AsRegister<XRegister>(); + XRegister src = locations->InAt(0).AsRegister<XRegister>(); + switch (result_type) { + case DataType::Type::kUint8: + __ Andi(dst, src, 0xFF); + break; + case DataType::Type::kInt8: + __ SextB(dst, src); + break; + case DataType::Type::kUint16: + __ ZextH(dst, src); + break; + case DataType::Type::kInt16: + __ SextH(dst, src); + break; + case DataType::Type::kInt32: + case DataType::Type::kInt64: + // Sign-extend 32-bit int into bits 32 through 63 for int-to-long and long-to-int + // conversions, except when the input and output registers are the same and we are not + // converting longs to shorter types. In these cases, do nothing. + if ((input_type == DataType::Type::kInt64) || (dst != src)) { + __ Addiw(dst, src, 0); + } + break; + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + UNREACHABLE(); + } + } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) { + FRegister dst = locations->Out().AsFpuRegister<FRegister>(); + XRegister src = locations->InAt(0).AsRegister<XRegister>(); + if (input_type == DataType::Type::kInt64) { + if (result_type == DataType::Type::kFloat32) { + __ FCvtSL(dst, src, FPRoundingMode::kRNE); + } else { + __ FCvtDL(dst, src, FPRoundingMode::kRNE); + } + } else { + if (result_type == DataType::Type::kFloat32) { + __ FCvtSW(dst, src, FPRoundingMode::kRNE); + } else { + __ FCvtDW(dst, src); // No rounding. + } + } + } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) { + CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64); + XRegister dst = locations->Out().AsRegister<XRegister>(); + FRegister src = locations->InAt(0).AsFpuRegister<FRegister>(); + if (result_type == DataType::Type::kInt64) { + if (input_type == DataType::Type::kFloat32) { + __ FCvtLS(dst, src, FPRoundingMode::kRTZ); + } else { + __ FCvtLD(dst, src, FPRoundingMode::kRTZ); + } + } else { + if (input_type == DataType::Type::kFloat32) { + __ FCvtWS(dst, src, FPRoundingMode::kRTZ); + } else { + __ FCvtWD(dst, src, FPRoundingMode::kRTZ); + } + } + } else if (DataType::IsFloatingPointType(result_type) && + DataType::IsFloatingPointType(input_type)) { + FRegister dst = locations->Out().AsFpuRegister<FRegister>(); + FRegister src = locations->InAt(0).AsFpuRegister<FRegister>(); + if (result_type == DataType::Type::kFloat32) { + __ FCvtSD(dst, src); + } else { + __ FCvtDS(dst, src); + } + } else { + LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type + << " to " << result_type; + UNREACHABLE(); + } +} + +void LocationsBuilderRISCV64::VisitUShr(HUShr* instruction) { + HandleShift(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitUShr(HUShr* instruction) { + HandleShift(instruction); +} + +void LocationsBuilderRISCV64::VisitXor(HXor* instruction) { + HandleBinaryOp(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitXor(HXor* instruction) { + HandleBinaryOp(instruction); +} + +void LocationsBuilderRISCV64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecExtractScalar(HVecExtractScalar* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecExtractScalar(HVecExtractScalar* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecReduce(HVecReduce* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecReduce(HVecReduce* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecCnv(HVecCnv* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecCnv(HVecCnv* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecNeg(HVecNeg* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecNeg(HVecNeg* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecAbs(HVecAbs* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecAbs(HVecAbs* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecNot(HVecNot* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecNot(HVecNot* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecAdd(HVecAdd* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecAdd(HVecAdd* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecSub(HVecSub* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecSub(HVecSub* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecMul(HVecMul* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecMul(HVecMul* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecDiv(HVecDiv* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecDiv(HVecDiv* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecMin(HVecMin* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecMin(HVecMin* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecMax(HVecMax* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecMax(HVecMax* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecAnd(HVecAnd* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecAnd(HVecAnd* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecAndNot(HVecAndNot* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecAndNot(HVecAndNot* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecOr(HVecOr* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecOr(HVecOr* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecXor(HVecXor* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecXor(HVecXor* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecShl(HVecShl* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecShl(HVecShl* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecShr(HVecShr* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecShr(HVecShr* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecUShr(HVecUShr* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecUShr(HVecUShr* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecSetScalars(HVecSetScalars* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecSetScalars(HVecSetScalars* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecMultiplyAccumulate( + HVecMultiplyAccumulate* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecDotProd(HVecDotProd* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecDotProd(HVecDotProd* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecLoad(HVecLoad* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecLoad(HVecLoad* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecStore(HVecStore* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecStore(HVecStore* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecPredSetAll(HVecPredSetAll* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecPredSetAll(HVecPredSetAll* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecPredWhile(HVecPredWhile* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecPredWhile(HVecPredWhile* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecCondition(HVecCondition* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecCondition(HVecCondition* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecPredNot(HVecPredNot* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecPredNot(HVecPredNot* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +namespace detail { + +// Mark which intrinsics we don't have handcrafted code for. +template <Intrinsics T> +struct IsUnimplemented { + bool is_unimplemented = false; +}; + +#define TRUE_OVERRIDE(Name) \ + template <> \ + struct IsUnimplemented<Intrinsics::k##Name> { \ + bool is_unimplemented = true; \ + }; +UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(TRUE_OVERRIDE) +#undef TRUE_OVERRIDE + +static constexpr bool kIsIntrinsicUnimplemented[] = { + false, // kNone +#define IS_UNIMPLEMENTED(Intrinsic, ...) \ + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + ART_INTRINSICS_LIST(IS_UNIMPLEMENTED) +#undef IS_UNIMPLEMENTED +}; + +} // namespace detail + +CodeGeneratorRISCV64::CodeGeneratorRISCV64(HGraph* graph, + const CompilerOptions& compiler_options, + OptimizingCompilerStats* stats) + : CodeGenerator(graph, + kNumberOfXRegisters, + kNumberOfFRegisters, + /*number_of_register_pairs=*/ 0u, + ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves)), + ComputeRegisterMask(kFpuCalleeSaves, arraysize(kFpuCalleeSaves)), + compiler_options, + stats, + ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)), + assembler_(graph->GetAllocator(), + compiler_options.GetInstructionSetFeatures()->AsRiscv64InstructionSetFeatures()), + location_builder_(graph, this), + instruction_visitor_(graph, this), + block_labels_(nullptr), + move_resolver_(graph->GetAllocator(), this), + uint32_literals_(std::less<uint32_t>(), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + uint64_literals_(std::less<uint64_t>(), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + jit_string_patches_(StringReferenceValueComparator(), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + jit_class_patches_(TypeReferenceValueComparator(), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { + // Always mark the RA register to be saved. + AddAllocatedRegister(Location::RegisterLocation(RA)); +} + +void CodeGeneratorRISCV64::MaybeIncrementHotness(bool is_frame_entry) { + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + ScratchRegisterScope srs(GetAssembler()); + XRegister method = is_frame_entry ? kArtMethodRegister : srs.AllocateXRegister(); + if (!is_frame_entry) { + __ Loadd(method, SP, 0); + } + XRegister counter = srs.AllocateXRegister(); + __ Loadhu(counter, method, ArtMethod::HotnessCountOffset().Int32Value()); + Riscv64Label done; + DCHECK_EQ(0u, interpreter::kNterpHotnessValue); + __ Beqz(counter, &done); // Can clobber `TMP` if taken. + __ Addi(counter, counter, -1); + // We may not have another scratch register available for `Storeh`()`, + // so we must use the `Sh()` function directly. + static_assert(IsInt<12>(ArtMethod::HotnessCountOffset().Int32Value())); + __ Sh(counter, method, ArtMethod::HotnessCountOffset().Int32Value()); + __ Bind(&done); + } + + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + SlowPathCodeRISCV64* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathRISCV64(); + AddSlowPath(slow_path); + ProfilingInfo* info = GetGraph()->GetProfilingInfo(); + DCHECK(info != nullptr); + DCHECK(!HasEmptyFrame()); + uint64_t address = reinterpret_cast64<uint64_t>(info); + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + __ LoadConst64(tmp, address); + XRegister counter = srs.AllocateXRegister(); + __ Loadhu(counter, tmp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()); + __ Beqz(counter, slow_path->GetEntryLabel()); // Can clobber `TMP` if taken. + __ Addi(counter, counter, -1); + // We do not have another scratch register available for `Storeh`()`, + // so we must use the `Sh()` function directly. + static_assert(IsInt<12>(ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); + __ Sh(counter, tmp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()); + __ Bind(slow_path->GetExitLabel()); + } +} + +bool CodeGeneratorRISCV64::CanUseImplicitSuspendCheck() const { + // TODO(riscv64): Implement implicit suspend checks to reduce code size. + return false; +} + +void CodeGeneratorRISCV64::GenerateMemoryBarrier(MemBarrierKind kind) { + switch (kind) { + case MemBarrierKind::kAnyAny: + __ Fence(/*pred=*/ kFenceRead | kFenceWrite, /*succ=*/ kFenceRead | kFenceWrite); + break; + case MemBarrierKind::kAnyStore: + __ Fence(/*pred=*/ kFenceRead | kFenceWrite, /*succ=*/ kFenceWrite); + break; + case MemBarrierKind::kLoadAny: + __ Fence(/*pred=*/ kFenceRead, /*succ=*/ kFenceRead | kFenceWrite); + break; + case MemBarrierKind::kStoreStore: + __ Fence(/*pred=*/ kFenceWrite, /*succ=*/ kFenceWrite); + break; + + default: + LOG(FATAL) << "Unexpected memory barrier " << kind; + UNREACHABLE(); + } +} + +void CodeGeneratorRISCV64::GenerateFrameEntry() { + // Check if we need to generate the clinit check. We will jump to the + // resolution stub if the class is not initialized and the executing thread is + // not the thread initializing it. + // We do this before constructing the frame to get the correct stack trace if + // an exception is thrown. + if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) { + Riscv64Label resolution; + Riscv64Label memory_barrier; + + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + XRegister tmp2 = srs.AllocateXRegister(); + + // We don't emit a read barrier here to save on code size. We rely on the + // resolution trampoline to do a clinit check before re-entering this code. + __ Loadwu(tmp2, kArtMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()); + + // We shall load the full 32-bit status word with sign-extension and compare as unsigned + // to sign-extended shifted status values. This yields the same comparison as loading and + // materializing unsigned but the constant is materialized with a single LUI instruction. + __ Loadw(tmp, tmp2, mirror::Class::StatusOffset().SizeValue()); // Sign-extended. + + // Check if we're visibly initialized. + __ Li(tmp2, ShiftedSignExtendedClassStatusValue<ClassStatus::kVisiblyInitialized>()); + __ Bgeu(tmp, tmp2, &frame_entry_label_); // Can clobber `TMP` if taken. + + // Check if we're initialized and jump to code that does a memory barrier if so. + __ Li(tmp2, ShiftedSignExtendedClassStatusValue<ClassStatus::kInitialized>()); + __ Bgeu(tmp, tmp2, &memory_barrier); // Can clobber `TMP` if taken. + + // Check if we're initializing and the thread initializing is the one + // executing the code. + __ Li(tmp2, ShiftedSignExtendedClassStatusValue<ClassStatus::kInitializing>()); + __ Bltu(tmp, tmp2, &resolution); // Can clobber `TMP` if taken. + + __ Loadwu(tmp2, kArtMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()); + __ Loadw(tmp, tmp2, mirror::Class::ClinitThreadIdOffset().Int32Value()); + __ Loadw(tmp2, TR, Thread::TidOffset<kRiscv64PointerSize>().Int32Value()); + __ Beq(tmp, tmp2, &frame_entry_label_); + __ Bind(&resolution); + + // Jump to the resolution stub. + ThreadOffset64 entrypoint_offset = + GetThreadOffset<kRiscv64PointerSize>(kQuickQuickResolutionTrampoline); + __ Loadd(tmp, TR, entrypoint_offset.Int32Value()); + __ Jr(tmp); + + __ Bind(&memory_barrier); + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + __ Bind(&frame_entry_label_); + + bool do_overflow_check = + FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kRiscv64) || !IsLeafMethod(); + + if (do_overflow_check) { + DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + __ Loadw( + Zero, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kRiscv64))); + RecordPcInfo(nullptr, 0); + } + + if (!HasEmptyFrame()) { + // Make sure the frame size isn't unreasonably large. + if (GetFrameSize() > GetStackOverflowReservedBytes(InstructionSet::kRiscv64)) { + LOG(FATAL) << "Stack frame larger than " + << GetStackOverflowReservedBytes(InstructionSet::kRiscv64) << " bytes"; + } + + // Spill callee-saved registers. + + uint32_t frame_size = GetFrameSize(); + + IncreaseFrame(frame_size); + + uint32_t offset = frame_size; + for (size_t i = arraysize(kCoreCalleeSaves); i != 0; ) { + --i; + XRegister reg = kCoreCalleeSaves[i]; + if (allocated_registers_.ContainsCoreRegister(reg)) { + offset -= kRiscv64DoublewordSize; + __ Stored(reg, SP, offset); + __ cfi().RelOffset(dwarf::Reg::Riscv64Core(reg), offset); + } + } + + for (size_t i = arraysize(kFpuCalleeSaves); i != 0; ) { + --i; + FRegister reg = kFpuCalleeSaves[i]; + if (allocated_registers_.ContainsFloatingPointRegister(reg)) { + offset -= kRiscv64DoublewordSize; + __ FStored(reg, SP, offset); + __ cfi().RelOffset(dwarf::Reg::Riscv64Fp(reg), offset); + } + } + + // Save the current method if we need it. Note that we do not + // do this in HCurrentMethod, as the instruction might have been removed + // in the SSA graph. + if (RequiresCurrentMethod()) { + __ Stored(kArtMethodRegister, SP, 0); + } + + if (GetGraph()->HasShouldDeoptimizeFlag()) { + // Initialize should_deoptimize flag to 0. + __ Storew(Zero, SP, GetStackOffsetOfShouldDeoptimizeFlag()); + } + } + MaybeIncrementHotness(/*is_frame_entry=*/ true); +} + +void CodeGeneratorRISCV64::GenerateFrameExit() { + __ cfi().RememberState(); + + if (!HasEmptyFrame()) { + // Restore callee-saved registers. + + // For better instruction scheduling restore RA before other registers. + uint32_t offset = GetFrameSize(); + for (size_t i = arraysize(kCoreCalleeSaves); i != 0; ) { + --i; + XRegister reg = kCoreCalleeSaves[i]; + if (allocated_registers_.ContainsCoreRegister(reg)) { + offset -= kRiscv64DoublewordSize; + __ Loadd(reg, SP, offset); + __ cfi().Restore(dwarf::Reg::Riscv64Core(reg)); + } + } + + for (size_t i = arraysize(kFpuCalleeSaves); i != 0; ) { + --i; + FRegister reg = kFpuCalleeSaves[i]; + if (allocated_registers_.ContainsFloatingPointRegister(reg)) { + offset -= kRiscv64DoublewordSize; + __ FLoadd(reg, SP, offset); + __ cfi().Restore(dwarf::Reg::Riscv64Fp(reg)); + } + } + + DecreaseFrame(GetFrameSize()); + } + + __ Jr(RA); + + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(GetFrameSize()); +} + +void CodeGeneratorRISCV64::Bind(HBasicBlock* block) { __ Bind(GetLabelOf(block)); } + +void CodeGeneratorRISCV64::MoveConstant(Location destination, int32_t value) { + DCHECK(destination.IsRegister()); + __ LoadConst32(destination.AsRegister<XRegister>(), value); +} + +void CodeGeneratorRISCV64::MoveLocation(Location destination, + Location source, + DataType::Type dst_type) { + if (source.Equals(destination)) { + return; + } + + // A valid move type can always be inferred from the destination and source locations. + // When moving from and to a register, the `dst_type` can be used to generate 32-bit instead + // of 64-bit moves but it's generally OK to use 64-bit moves for 32-bit values in registers. + bool unspecified_type = (dst_type == DataType::Type::kVoid); + // TODO(riscv64): Is the destination type known in all cases? + // TODO(riscv64): Can unspecified `dst_type` move 32-bit GPR to FPR without NaN-boxing? + CHECK(!unspecified_type); + + if (destination.IsRegister() || destination.IsFpuRegister()) { + if (unspecified_type) { + HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr; + if (source.IsStackSlot() || + (src_cst != nullptr && + (src_cst->IsIntConstant() || src_cst->IsFloatConstant() || src_cst->IsNullConstant()))) { + // For stack slots and 32-bit constants, a 32-bit type is appropriate. + dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32; + } else { + // If the source is a double stack slot or a 64-bit constant, a 64-bit type + // is appropriate. Else the source is a register, and since the type has not + // been specified, we chose a 64-bit type to force a 64-bit move. + dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64; + } + } + DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) || + (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type))); + + if (source.IsStackSlot() || source.IsDoubleStackSlot()) { + // Move to GPR/FPR from stack + if (DataType::IsFloatingPointType(dst_type)) { + if (DataType::Is64BitType(dst_type)) { + __ FLoadd(destination.AsFpuRegister<FRegister>(), SP, source.GetStackIndex()); + } else { + __ FLoadw(destination.AsFpuRegister<FRegister>(), SP, source.GetStackIndex()); + } + } else { + if (DataType::Is64BitType(dst_type)) { + __ Loadd(destination.AsRegister<XRegister>(), SP, source.GetStackIndex()); + } else if (dst_type == DataType::Type::kReference) { + __ Loadwu(destination.AsRegister<XRegister>(), SP, source.GetStackIndex()); + } else { + __ Loadw(destination.AsRegister<XRegister>(), SP, source.GetStackIndex()); + } + } + } else if (source.IsConstant()) { + // Move to GPR/FPR from constant + // TODO(riscv64): Consider using literals for difficult-to-materialize 64-bit constants. + int64_t value = GetInt64ValueOf(source.GetConstant()->AsConstant()); + ScratchRegisterScope srs(GetAssembler()); + XRegister gpr = DataType::IsFloatingPointType(dst_type) + ? srs.AllocateXRegister() + : destination.AsRegister<XRegister>(); + if (DataType::IsFloatingPointType(dst_type) && value == 0) { + gpr = Zero; // Note: The scratch register allocated above shall not be used. + } else { + // Note: For `float` we load the sign-extended value here as it can sometimes yield + // a shorter instruction sequence. The higher 32 bits shall be ignored during the + // transfer to FP reg and the result shall be correctly NaN-boxed. + __ LoadConst64(gpr, value); + } + if (dst_type == DataType::Type::kFloat32) { + __ FMvWX(destination.AsFpuRegister<FRegister>(), gpr); + } else if (dst_type == DataType::Type::kFloat64) { + __ FMvDX(destination.AsFpuRegister<FRegister>(), gpr); + } + } else if (source.IsRegister()) { + if (destination.IsRegister()) { + // Move to GPR from GPR + __ Mv(destination.AsRegister<XRegister>(), source.AsRegister<XRegister>()); + } else { + DCHECK(destination.IsFpuRegister()); + if (DataType::Is64BitType(dst_type)) { + __ FMvDX(destination.AsFpuRegister<FRegister>(), source.AsRegister<XRegister>()); + } else { + __ FMvWX(destination.AsFpuRegister<FRegister>(), source.AsRegister<XRegister>()); + } + } + } else if (source.IsFpuRegister()) { + if (destination.IsFpuRegister()) { + if (GetGraph()->HasSIMD()) { + LOG(FATAL) << "Vector extension is unsupported"; + UNREACHABLE(); + } else { + // Move to FPR from FPR + if (dst_type == DataType::Type::kFloat32) { + __ FMvS(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>()); + } else { + DCHECK_EQ(dst_type, DataType::Type::kFloat64); + __ FMvD(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>()); + } + } + } else { + DCHECK(destination.IsRegister()); + if (DataType::Is64BitType(dst_type)) { + __ FMvXD(destination.AsRegister<XRegister>(), source.AsFpuRegister<FRegister>()); + } else { + __ FMvXW(destination.AsRegister<XRegister>(), source.AsFpuRegister<FRegister>()); + } + } + } + } else if (destination.IsSIMDStackSlot()) { + LOG(FATAL) << "SIMD is unsupported"; + UNREACHABLE(); + } else { // The destination is not a register. It must be a stack slot. + DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot()); + if (source.IsRegister() || source.IsFpuRegister()) { + if (unspecified_type) { + if (source.IsRegister()) { + dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64; + } else { + dst_type = + destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64; + } + } + DCHECK((destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type)) && + (source.IsFpuRegister() == DataType::IsFloatingPointType(dst_type))); + // Move to stack from GPR/FPR + if (DataType::Is64BitType(dst_type)) { + if (source.IsRegister()) { + __ Stored(source.AsRegister<XRegister>(), SP, destination.GetStackIndex()); + } else { + __ FStored(source.AsFpuRegister<FRegister>(), SP, destination.GetStackIndex()); + } + } else { + if (source.IsRegister()) { + __ Storew(source.AsRegister<XRegister>(), SP, destination.GetStackIndex()); + } else { + __ FStorew(source.AsFpuRegister<FRegister>(), SP, destination.GetStackIndex()); + } + } + } else if (source.IsConstant()) { + // Move to stack from constant + int64_t value = GetInt64ValueOf(source.GetConstant()); + ScratchRegisterScope srs(GetAssembler()); + XRegister gpr = (value != 0) ? srs.AllocateXRegister() : Zero; + if (value != 0) { + __ LoadConst64(gpr, value); + } + if (destination.IsStackSlot()) { + __ Storew(gpr, SP, destination.GetStackIndex()); + } else { + DCHECK(destination.IsDoubleStackSlot()); + __ Stored(gpr, SP, destination.GetStackIndex()); + } + } else { + DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot()); + DCHECK_EQ(source.IsDoubleStackSlot(), destination.IsDoubleStackSlot()); + // Move to stack from stack + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + if (destination.IsStackSlot()) { + __ Loadw(tmp, SP, source.GetStackIndex()); + __ Storew(tmp, SP, destination.GetStackIndex()); + } else { + __ Loadd(tmp, SP, source.GetStackIndex()); + __ Stored(tmp, SP, destination.GetStackIndex()); + } + } + } +} + +void CodeGeneratorRISCV64::AddLocationAsTemp(Location location, LocationSummary* locations) { + if (location.IsRegister()) { + locations->AddTemp(location); + } else { + UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location; + } +} + +void CodeGeneratorRISCV64::SetupBlockedRegisters() const { + // ZERO, GP, SP, RA, TP and TR(S1) are reserved and can't be allocated. + blocked_core_registers_[Zero] = true; + blocked_core_registers_[GP] = true; + blocked_core_registers_[SP] = true; + blocked_core_registers_[RA] = true; + blocked_core_registers_[TP] = true; + blocked_core_registers_[TR] = true; // ART Thread register. + + // TMP(T6), TMP2(T5) and FTMP(FT11) are used as temporary/scratch registers. + blocked_core_registers_[TMP] = true; + blocked_core_registers_[TMP2] = true; + blocked_fpu_registers_[FTMP] = true; + + if (GetGraph()->IsDebuggable()) { + // Stubs do not save callee-save floating point registers. If the graph + // is debuggable, we need to deal with these registers differently. For + // now, just block them. + for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { + blocked_fpu_registers_[kFpuCalleeSaves[i]] = true; + } + } +} + +size_t CodeGeneratorRISCV64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { + __ Stored(XRegister(reg_id), SP, stack_index); + return kRiscv64DoublewordSize; +} + +size_t CodeGeneratorRISCV64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { + __ Loadd(XRegister(reg_id), SP, stack_index); + return kRiscv64DoublewordSize; +} + +size_t CodeGeneratorRISCV64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { + if (GetGraph()->HasSIMD()) { + // TODO(riscv64): RISC-V vector extension. + UNIMPLEMENTED(FATAL) << "Vector extension is unsupported"; + UNREACHABLE(); + } + __ FStored(FRegister(reg_id), SP, stack_index); + return kRiscv64FloatRegSizeInBytes; +} + +size_t CodeGeneratorRISCV64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { + if (GetGraph()->HasSIMD()) { + // TODO(riscv64): RISC-V vector extension. + UNIMPLEMENTED(FATAL) << "Vector extension is unsupported"; + UNREACHABLE(); + } + __ FLoadd(FRegister(reg_id), SP, stack_index); + return kRiscv64FloatRegSizeInBytes; +} + +void CodeGeneratorRISCV64::DumpCoreRegister(std::ostream& stream, int reg) const { + stream << XRegister(reg); +} + +void CodeGeneratorRISCV64::DumpFloatingPointRegister(std::ostream& stream, int reg) const { + stream << FRegister(reg); +} + +void CodeGeneratorRISCV64::Finalize() { + // Ensure that we fix up branches and literal loads and emit the literal pool. + __ FinalizeCode(); + + // Adjust native pc offsets in stack maps. + StackMapStream* stack_map_stream = GetStackMapStream(); + for (size_t i = 0, num = stack_map_stream->GetNumberOfStackMaps(); i != num; ++i) { + uint32_t old_position = stack_map_stream->GetStackMapNativePcOffset(i); + uint32_t new_position = __ GetAdjustedPosition(old_position); + DCHECK_GE(new_position, old_position); + stack_map_stream->SetStackMapNativePcOffset(i, new_position); + } + + // Adjust pc offsets for the disassembly information. + if (disasm_info_ != nullptr) { + GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval(); + frame_entry_interval->start = __ GetAdjustedPosition(frame_entry_interval->start); + frame_entry_interval->end = __ GetAdjustedPosition(frame_entry_interval->end); + for (auto& entry : *disasm_info_->GetInstructionIntervals()) { + entry.second.start = __ GetAdjustedPosition(entry.second.start); + entry.second.end = __ GetAdjustedPosition(entry.second.end); + } + for (auto& entry : *disasm_info_->GetSlowPathIntervals()) { + entry.code_interval.start = __ GetAdjustedPosition(entry.code_interval.start); + entry.code_interval.end = __ GetAdjustedPosition(entry.code_interval.end); + } + } +} + +// Generate code to invoke a runtime entry point. +void CodeGeneratorRISCV64::InvokeRuntime(QuickEntrypointEnum entrypoint, + HInstruction* instruction, + uint32_t dex_pc, + SlowPathCode* slow_path) { + ValidateInvokeRuntime(entrypoint, instruction, slow_path); + + ThreadOffset64 entrypoint_offset = GetThreadOffset<kRiscv64PointerSize>(entrypoint); + + // TODO(riscv64): Reduce code size for AOT by using shared trampolines for slow path + // runtime calls across the entire oat file. + __ Loadd(RA, TR, entrypoint_offset.Int32Value()); + __ Jalr(RA); + if (EntrypointRequiresStackMap(entrypoint)) { + RecordPcInfo(instruction, dex_pc, slow_path); + } +} + +// Generate code to invoke a runtime entry point, but do not record +// PC-related information in a stack map. +void CodeGeneratorRISCV64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path) { + ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); + __ Loadd(RA, TR, entry_point_offset); + __ Jalr(RA); +} + +void CodeGeneratorRISCV64::IncreaseFrame(size_t adjustment) { + int32_t adjustment32 = dchecked_integral_cast<int32_t>(adjustment); + __ AddConst64(SP, SP, -adjustment32); + GetAssembler()->cfi().AdjustCFAOffset(adjustment32); +} + +void CodeGeneratorRISCV64::DecreaseFrame(size_t adjustment) { + int32_t adjustment32 = dchecked_integral_cast<int32_t>(adjustment); + __ AddConst64(SP, SP, adjustment32); + GetAssembler()->cfi().AdjustCFAOffset(-adjustment32); +} + +void CodeGeneratorRISCV64::GenerateNop() { + __ Nop(); +} + +void CodeGeneratorRISCV64::GenerateImplicitNullCheck(HNullCheck* instruction) { + if (CanMoveNullCheckToUser(instruction)) { + return; + } + Location obj = instruction->GetLocations()->InAt(0); + + __ Lw(Zero, obj.AsRegister<XRegister>(), 0); + RecordPcInfo(instruction, instruction->GetDexPc()); +} + +void CodeGeneratorRISCV64::GenerateExplicitNullCheck(HNullCheck* instruction) { + SlowPathCodeRISCV64* slow_path = new (GetScopedAllocator()) NullCheckSlowPathRISCV64(instruction); + AddSlowPath(slow_path); + + Location obj = instruction->GetLocations()->InAt(0); + + __ Beqz(obj.AsRegister<XRegister>(), slow_path->GetEntryLabel()); +} + +HLoadString::LoadKind CodeGeneratorRISCV64::GetSupportedLoadStringKind( + HLoadString::LoadKind desired_string_load_kind) { + switch (desired_string_load_kind) { + case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageRelRo: + case HLoadString::LoadKind::kBssEntry: + DCHECK(!Runtime::Current()->UseJitCompilation()); + break; + case HLoadString::LoadKind::kJitBootImageAddress: + case HLoadString::LoadKind::kJitTableAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + break; + case HLoadString::LoadKind::kRuntimeCall: + break; + } + return desired_string_load_kind; +} + +HLoadClass::LoadKind CodeGeneratorRISCV64::GetSupportedLoadClassKind( + HLoadClass::LoadKind desired_class_load_kind) { + switch (desired_class_load_kind) { + case HLoadClass::LoadKind::kInvalid: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + case HLoadClass::LoadKind::kReferrersClass: + break; + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + case HLoadClass::LoadKind::kBootImageRelRo: + case HLoadClass::LoadKind::kBssEntry: + case HLoadClass::LoadKind::kBssEntryPublic: + case HLoadClass::LoadKind::kBssEntryPackage: + DCHECK(!Runtime::Current()->UseJitCompilation()); + break; + case HLoadClass::LoadKind::kJitBootImageAddress: + case HLoadClass::LoadKind::kJitTableAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + break; + case HLoadClass::LoadKind::kRuntimeCall: + break; + } + return desired_class_load_kind; +} + +HInvokeStaticOrDirect::DispatchInfo CodeGeneratorRISCV64::GetSupportedInvokeStaticOrDirectDispatch( + const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, ArtMethod* method) { + UNUSED(method); + // On RISCV64 we support all dispatch types. + return desired_dispatch_info; +} + +CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageIntrinsicPatch( + uint32_t intrinsic_data, const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + /* dex_file= */ nullptr, intrinsic_data, info_high, &boot_image_other_patches_); +} + +CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageRelRoPatch( + uint32_t boot_image_offset, const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + /* dex_file= */ nullptr, boot_image_offset, info_high, &boot_image_other_patches_); +} + +CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageMethodPatch( + MethodReference target_method, const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + target_method.dex_file, target_method.index, info_high, &boot_image_method_patches_); +} + +CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewMethodBssEntryPatch( + MethodReference target_method, const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + target_method.dex_file, target_method.index, info_high, &method_bss_entry_patches_); +} + +CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageTypePatch( + const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch(&dex_file, type_index.index_, info_high, &boot_image_type_patches_); +} + +CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageJniEntrypointPatch( + MethodReference target_method, const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + target_method.dex_file, target_method.index, info_high, &boot_image_jni_entrypoint_patches_); +} + +CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewTypeBssEntryPatch( + HLoadClass* load_class, + const PcRelativePatchInfo* info_high) { + const DexFile& dex_file = load_class->GetDexFile(); + dex::TypeIndex type_index = load_class->GetTypeIndex(); + ArenaDeque<PcRelativePatchInfo>* patches = nullptr; + switch (load_class->GetLoadKind()) { + case HLoadClass::LoadKind::kBssEntry: + patches = &type_bss_entry_patches_; + break; + case HLoadClass::LoadKind::kBssEntryPublic: + patches = &public_type_bss_entry_patches_; + break; + case HLoadClass::LoadKind::kBssEntryPackage: + patches = &package_type_bss_entry_patches_; + break; + default: + LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind(); + UNREACHABLE(); + } + return NewPcRelativePatch(&dex_file, type_index.index_, info_high, patches); +} + +CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageStringPatch( + const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch(&dex_file, string_index.index_, info_high, &boot_image_string_patches_); +} + +CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewStringBssEntryPatch( + const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch(&dex_file, string_index.index_, info_high, &string_bss_entry_patches_); +} + +CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewPcRelativePatch( + const DexFile* dex_file, + uint32_t offset_or_index, + const PcRelativePatchInfo* info_high, + ArenaDeque<PcRelativePatchInfo>* patches) { + patches->emplace_back(dex_file, offset_or_index, info_high); + return &patches->back(); +} + +Literal* CodeGeneratorRISCV64::DeduplicateUint32Literal(uint32_t value) { + return uint32_literals_.GetOrCreate(value, + [this, value]() { return __ NewLiteral<uint32_t>(value); }); +} + +Literal* CodeGeneratorRISCV64::DeduplicateUint64Literal(uint64_t value) { + return uint64_literals_.GetOrCreate(value, + [this, value]() { return __ NewLiteral<uint64_t>(value); }); +} + +Literal* CodeGeneratorRISCV64::DeduplicateBootImageAddressLiteral(uint64_t address) { + return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address)); +} + +Literal* CodeGeneratorRISCV64::DeduplicateJitStringLiteral(const DexFile& dex_file, + dex::StringIndex string_index, + Handle<mirror::String> handle) { + ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); + return jit_string_patches_.GetOrCreate( + StringReference(&dex_file, string_index), + [this]() { return __ NewLiteral<uint32_t>(/* value= */ 0u); }); +} + +Literal* CodeGeneratorRISCV64::DeduplicateJitClassLiteral(const DexFile& dex_file, + dex::TypeIndex type_index, + Handle<mirror::Class> handle) { + ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); + return jit_class_patches_.GetOrCreate( + TypeReference(&dex_file, type_index), + [this]() { return __ NewLiteral<uint32_t>(/* value= */ 0u); }); +} + +void CodeGeneratorRISCV64::PatchJitRootUse(uint8_t* code, + const uint8_t* roots_data, + const Literal* literal, + uint64_t index_in_table) const { + uint32_t literal_offset = GetAssembler().GetLabelLocation(literal->GetLabel()); + uintptr_t address = + reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); + reinterpret_cast<uint32_t*>(code + literal_offset)[0] = dchecked_integral_cast<uint32_t>(address); +} + +void CodeGeneratorRISCV64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { + for (const auto& entry : jit_string_patches_) { + const StringReference& string_reference = entry.first; + Literal* table_entry_literal = entry.second; + uint64_t index_in_table = GetJitStringRootIndex(string_reference); + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); + } + for (const auto& entry : jit_class_patches_) { + const TypeReference& type_reference = entry.first; + Literal* table_entry_literal = entry.second; + uint64_t index_in_table = GetJitClassRootIndex(type_reference); + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); + } +} + +void CodeGeneratorRISCV64::EmitPcRelativeAuipcPlaceholder(PcRelativePatchInfo* info_high, + XRegister out) { + DCHECK(info_high->pc_insn_label == &info_high->label); + __ Bind(&info_high->label); + __ Auipc(out, /*imm20=*/ kLinkTimeOffsetPlaceholderHigh); +} + +void CodeGeneratorRISCV64::EmitPcRelativeAddiPlaceholder(PcRelativePatchInfo* info_low, + XRegister rd, + XRegister rs1) { + DCHECK(info_low->pc_insn_label != &info_low->label); + __ Bind(&info_low->label); + __ Addi(rd, rs1, /*imm12=*/ kLinkTimeOffsetPlaceholderLow); +} + +void CodeGeneratorRISCV64::EmitPcRelativeLwuPlaceholder(PcRelativePatchInfo* info_low, + XRegister rd, + XRegister rs1) { + DCHECK(info_low->pc_insn_label != &info_low->label); + __ Bind(&info_low->label); + __ Lwu(rd, rs1, /*offset=*/ kLinkTimeOffsetPlaceholderLow); +} + +void CodeGeneratorRISCV64::EmitPcRelativeLdPlaceholder(PcRelativePatchInfo* info_low, + XRegister rd, + XRegister rs1) { + DCHECK(info_low->pc_insn_label != &info_low->label); + __ Bind(&info_low->label); + __ Ld(rd, rs1, /*offset=*/ kLinkTimeOffsetPlaceholderLow); +} + +template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +inline void CodeGeneratorRISCV64::EmitPcRelativeLinkerPatches( + const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<linker::LinkerPatch>* linker_patches) { + for (const PcRelativePatchInfo& info : infos) { + linker_patches->push_back(Factory(__ GetLabelLocation(&info.label), + info.target_dex_file, + __ GetLabelLocation(info.pc_insn_label), + info.offset_or_index)); + } +} + +template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> +linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. + return Factory(literal_offset, pc_insn_offset, boot_image_offset); +} + +void CodeGeneratorRISCV64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { + DCHECK(linker_patches->empty()); + size_t size = + boot_image_method_patches_.size() + + method_bss_entry_patches_.size() + + boot_image_type_patches_.size() + + type_bss_entry_patches_.size() + + public_type_bss_entry_patches_.size() + + package_type_bss_entry_patches_.size() + + boot_image_string_patches_.size() + + string_bss_entry_patches_.size() + + boot_image_jni_entrypoint_patches_.size() + + boot_image_other_patches_.size(); + linker_patches->reserve(size); + if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) { + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( + boot_image_method_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( + boot_image_type_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( + boot_image_string_patches_, linker_patches); + } else { + DCHECK(boot_image_method_patches_.empty()); + DCHECK(boot_image_type_patches_.empty()); + DCHECK(boot_image_string_patches_.empty()); + } + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_other_patches_, linker_patches); + } else { + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( + boot_image_other_patches_, linker_patches); + } + EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( + method_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>( + type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>( + public_type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>( + package_type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>( + string_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>( + boot_image_jni_entrypoint_patches_, linker_patches); + DCHECK_EQ(size, linker_patches->size()); +} + +void CodeGeneratorRISCV64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) { + switch (load_kind) { + case MethodLoadKind::kBootImageLinkTimePcRelative: { + DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_high = + NewBootImageMethodPatch(invoke->GetResolvedMethodReference()); + EmitPcRelativeAuipcPlaceholder(info_high, temp.AsRegister<XRegister>()); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = + NewBootImageMethodPatch(invoke->GetResolvedMethodReference(), info_high); + EmitPcRelativeAddiPlaceholder( + info_low, temp.AsRegister<XRegister>(), temp.AsRegister<XRegister>()); + break; + } + case MethodLoadKind::kBootImageRelRo: { + uint32_t boot_image_offset = GetBootImageOffset(invoke); + PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset); + EmitPcRelativeAuipcPlaceholder(info_high, temp.AsRegister<XRegister>()); + PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high); + // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. + EmitPcRelativeLwuPlaceholder( + info_low, temp.AsRegister<XRegister>(), temp.AsRegister<XRegister>()); + break; + } + case MethodLoadKind::kBssEntry: { + PcRelativePatchInfo* info_high = NewMethodBssEntryPatch(invoke->GetMethodReference()); + EmitPcRelativeAuipcPlaceholder(info_high, temp.AsRegister<XRegister>()); + PcRelativePatchInfo* info_low = + NewMethodBssEntryPatch(invoke->GetMethodReference(), info_high); + EmitPcRelativeLdPlaceholder( + info_low, temp.AsRegister<XRegister>(), temp.AsRegister<XRegister>()); + break; + } + case MethodLoadKind::kJitDirectAddress: { + __ LoadConst64(temp.AsRegister<XRegister>(), + reinterpret_cast<uint64_t>(invoke->GetResolvedMethod())); + break; + } + case MethodLoadKind::kRuntimeCall: { + // Test situation, don't do anything. + break; + } + default: { + LOG(FATAL) << "Load kind should have already been handled " << load_kind; + UNREACHABLE(); + } + } +} + +void CodeGeneratorRISCV64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, + Location temp, + SlowPathCode* slow_path) { + // All registers are assumed to be correctly set up per the calling convention. + Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. + + switch (invoke->GetMethodLoadKind()) { + case MethodLoadKind::kStringInit: { + // temp = thread->string_init_entrypoint + uint32_t offset = + GetThreadOffset<kRiscv64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); + __ Loadd(temp.AsRegister<XRegister>(), TR, offset); + break; + } + case MethodLoadKind::kRecursive: + callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex()); + break; + case MethodLoadKind::kRuntimeCall: + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. + case MethodLoadKind::kBootImageLinkTimePcRelative: + DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()); + if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) { + // Do not materialize the method pointer, load directly the entrypoint. + CodeGeneratorRISCV64::PcRelativePatchInfo* info_high = + NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference()); + EmitPcRelativeAuipcPlaceholder(info_high, RA); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = + NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference(), info_high); + EmitPcRelativeLdPlaceholder(info_low, RA, RA); + break; + } + FALLTHROUGH_INTENDED; + default: + LoadMethod(invoke->GetMethodLoadKind(), temp, invoke); + break; + } + + switch (invoke->GetCodePtrLocation()) { + case CodePtrLocation::kCallSelf: + DCHECK(!GetGraph()->HasShouldDeoptimizeFlag()); + __ Jal(&frame_entry_label_); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); + break; + case CodePtrLocation::kCallArtMethod: + // RA = callee_method->entry_point_from_quick_compiled_code_; + __ Loadd(RA, + callee_method.AsRegister<XRegister>(), + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kRiscv64PointerSize).Int32Value()); + // RA() + __ Jalr(RA); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); + break; + case CodePtrLocation::kCallCriticalNative: { + size_t out_frame_size = + PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorRiscv64, + kNativeStackAlignment, + GetCriticalNativeDirectCallFrameSize>(invoke); + if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) { + // Entrypoint is already loaded in RA. + } else { + // RA = callee_method->ptr_sized_fields_.data_; // EntryPointFromJni + MemberOffset offset = ArtMethod::EntryPointFromJniOffset(kRiscv64PointerSize); + __ Loadd(RA, callee_method.AsRegister<XRegister>(), offset.Int32Value()); + } + __ Jalr(RA); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); + // The result is returned the same way in native ABI and managed ABI. No result conversion is + // needed, see comments in `Riscv64JniCallingConvention::RequiresSmallResultTypeExtension()`. + if (out_frame_size != 0u) { + DecreaseFrame(out_frame_size); + } + break; + } + } + + DCHECK(!IsLeafMethod()); +} + +void CodeGeneratorRISCV64::MaybeGenerateInlineCacheCheck(HInstruction* instruction, + XRegister klass) { + // We know the destination of an intrinsic, so no need to record inline caches. + if (!instruction->GetLocations()->Intrinsified() && + GetGraph()->IsCompilingBaseline() && + !Runtime::Current()->IsAotCompiler()) { + DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke()); + ProfilingInfo* info = GetGraph()->GetProfilingInfo(); + DCHECK(info != nullptr); + InlineCache* cache = info->GetInlineCache(instruction->GetDexPc()); + uint64_t address = reinterpret_cast64<uint64_t>(cache); + Riscv64Label done; + // The `art_quick_update_inline_cache` expects the inline cache in T5. + XRegister ic_reg = T5; + ScratchRegisterScope srs(GetAssembler()); + DCHECK_EQ(srs.AvailableXRegisters(), 2u); + srs.ExcludeXRegister(ic_reg); + DCHECK_EQ(srs.AvailableXRegisters(), 1u); + __ LoadConst64(ic_reg, address); + { + ScratchRegisterScope srs2(GetAssembler()); + XRegister tmp = srs2.AllocateXRegister(); + __ Loadd(tmp, ic_reg, InlineCache::ClassesOffset().Int32Value()); + // Fast path for a monomorphic cache. + __ Beq(klass, tmp, &done); + } + InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc()); + __ Bind(&done); + } +} + +void CodeGeneratorRISCV64::GenerateVirtualCall(HInvokeVirtual* invoke, + Location temp_location, + SlowPathCode* slow_path) { + // Use the calling convention instead of the location of the receiver, as + // intrinsics may have put the receiver in a different register. In the intrinsics + // slow path, the arguments have been moved to the right place, so here we are + // guaranteed that the receiver is the first register of the calling convention. + InvokeDexCallingConvention calling_convention; + XRegister receiver = calling_convention.GetRegisterAt(0); + XRegister temp = temp_location.AsRegister<XRegister>(); + MemberOffset method_offset = + mirror::Class::EmbeddedVTableEntryOffset(invoke->GetVTableIndex(), kRiscv64PointerSize); + MemberOffset class_offset = mirror::Object::ClassOffset(); + Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kRiscv64PointerSize); + + // temp = object->GetClass(); + __ Loadwu(temp, receiver, class_offset.Int32Value()); + MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). + MaybeUnpoisonHeapReference(temp); + + // If we're compiling baseline, update the inline cache. + MaybeGenerateInlineCacheCheck(invoke, temp); + + // temp = temp->GetMethodAt(method_offset); + __ Loadd(temp, temp, method_offset.Int32Value()); + // RA = temp->GetEntryPoint(); + __ Loadd(RA, temp, entry_point.Int32Value()); + // RA(); + __ Jalr(RA); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); +} + +void CodeGeneratorRISCV64::MoveFromReturnRegister(Location trg, DataType::Type type) { + if (!trg.IsValid()) { + DCHECK_EQ(type, DataType::Type::kVoid); + return; + } + + DCHECK_NE(type, DataType::Type::kVoid); + + if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) { + XRegister trg_reg = trg.AsRegister<XRegister>(); + XRegister res_reg = Riscv64ReturnLocation(type).AsRegister<XRegister>(); + if (trg_reg != res_reg) { + __ Mv(trg_reg, res_reg); + } + } else { + FRegister trg_reg = trg.AsFpuRegister<FRegister>(); + FRegister res_reg = Riscv64ReturnLocation(type).AsFpuRegister<FRegister>(); + if (trg_reg != res_reg) { + __ FMvD(trg_reg, res_reg); // 64-bit move is OK also for `float`. + } + } +} + +void CodeGeneratorRISCV64::PoisonHeapReference(XRegister reg) { + __ Sub(reg, Zero, reg); // Negate the ref. + __ ZextW(reg, reg); // Zero-extend the 32-bit ref. +} + +void CodeGeneratorRISCV64::UnpoisonHeapReference(XRegister reg) { + __ Sub(reg, Zero, reg); // Negate the ref. + __ ZextW(reg, reg); // Zero-extend the 32-bit ref. +} + +inline void CodeGeneratorRISCV64::MaybePoisonHeapReference(XRegister reg) { + if (kPoisonHeapReferences) { + PoisonHeapReference(reg); + } +} + +inline void CodeGeneratorRISCV64::MaybeUnpoisonHeapReference(XRegister reg) { + if (kPoisonHeapReferences) { + UnpoisonHeapReference(reg); + } +} + +void CodeGeneratorRISCV64::SwapLocations(Location loc1, Location loc2, DataType::Type type) { + DCHECK(!loc1.IsConstant()); + DCHECK(!loc2.IsConstant()); + + if (loc1.Equals(loc2)) { + return; + } + + bool is_slot1 = loc1.IsStackSlot() || loc1.IsDoubleStackSlot(); + bool is_slot2 = loc2.IsStackSlot() || loc2.IsDoubleStackSlot(); + bool is_simd1 = loc1.IsSIMDStackSlot(); + bool is_simd2 = loc2.IsSIMDStackSlot(); + bool is_fp_reg1 = loc1.IsFpuRegister(); + bool is_fp_reg2 = loc2.IsFpuRegister(); + + if ((is_slot1 != is_slot2) || + (loc2.IsRegister() && loc1.IsRegister()) || + (is_fp_reg2 && is_fp_reg1)) { + if ((is_fp_reg2 && is_fp_reg1) && GetGraph()->HasSIMD()) { + LOG(FATAL) << "Unsupported"; + UNREACHABLE(); + } + ScratchRegisterScope srs(GetAssembler()); + Location tmp = (is_fp_reg2 || is_fp_reg1) + ? Location::FpuRegisterLocation(srs.AllocateFRegister()) + : Location::RegisterLocation(srs.AllocateXRegister()); + MoveLocation(tmp, loc1, type); + MoveLocation(loc1, loc2, type); + MoveLocation(loc2, tmp, type); + } else if (is_slot1 && is_slot2) { + move_resolver_.Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), loc1.IsDoubleStackSlot()); + } else if (is_simd1 && is_simd2) { + // TODO(riscv64): Add VECTOR/SIMD later. + UNIMPLEMENTED(FATAL) << "Vector extension is unsupported"; + } else if ((is_fp_reg1 && is_simd2) || (is_fp_reg2 && is_simd1)) { + // TODO(riscv64): Add VECTOR/SIMD later. + UNIMPLEMENTED(FATAL) << "Vector extension is unsupported"; + } else { + LOG(FATAL) << "Unimplemented swap between locations " << loc1 << " and " << loc2; + } +} + +} // namespace riscv64 +} // namespace art diff --git a/compiler/optimizing/code_generator_riscv64.h b/compiler/optimizing/code_generator_riscv64.h index 405b39aa0a..375cec957f 100644 --- a/compiler/optimizing/code_generator_riscv64.h +++ b/compiler/optimizing/code_generator_riscv64.h @@ -17,7 +17,888 @@ #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_ +#include "android-base/logging.h" +#include "arch/riscv64/registers_riscv64.h" +#include "base/macros.h" #include "code_generator.h" #include "driver/compiler_options.h" +#include "intrinsics_list.h" +#include "optimizing/locations.h" +#include "parallel_move_resolver.h" +#include "utils/riscv64/assembler_riscv64.h" + +namespace art HIDDEN { +namespace riscv64 { + +// InvokeDexCallingConvention registers +static constexpr XRegister kParameterCoreRegisters[] = {A1, A2, A3, A4, A5, A6, A7}; +static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); + +static constexpr FRegister kParameterFpuRegisters[] = {FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7}; +static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters); + +// InvokeRuntimeCallingConvention registers +static constexpr XRegister kRuntimeParameterCoreRegisters[] = {A0, A1, A2, A3, A4, A5, A6, A7}; +static constexpr size_t kRuntimeParameterCoreRegistersLength = + arraysize(kRuntimeParameterCoreRegisters); + +static constexpr FRegister kRuntimeParameterFpuRegisters[] = { + FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7 +}; +static constexpr size_t kRuntimeParameterFpuRegistersLength = + arraysize(kRuntimeParameterFpuRegisters); + +#define UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(V) \ + V(IntegerReverse) \ + V(IntegerDivideUnsigned) \ + V(LongReverse) \ + V(LongDivideUnsigned) \ + V(MathFmaDouble) \ + V(MathFmaFloat) \ + V(MathCos) \ + V(MathSin) \ + V(MathAcos) \ + V(MathAsin) \ + V(MathAtan) \ + V(MathAtan2) \ + V(MathPow) \ + V(MathCbrt) \ + V(MathCosh) \ + V(MathExp) \ + V(MathExpm1) \ + V(MathHypot) \ + V(MathLog) \ + V(MathLog10) \ + V(MathNextAfter) \ + V(MathSinh) \ + V(MathTan) \ + V(MathTanh) \ + V(MathSqrt) \ + V(MathCeil) \ + V(MathFloor) \ + V(MathRint) \ + V(MathRoundDouble) \ + V(MathRoundFloat) \ + V(MathMultiplyHigh) \ + V(SystemArrayCopyByte) \ + V(SystemArrayCopyChar) \ + V(SystemArrayCopyInt) \ + V(SystemArrayCopy) \ + V(ThreadCurrentThread) \ + V(FP16Ceil) \ + V(FP16Compare) \ + V(FP16Floor) \ + V(FP16Rint) \ + V(FP16ToFloat) \ + V(FP16ToHalf) \ + V(FP16Greater) \ + V(FP16GreaterEquals) \ + V(FP16Less) \ + V(FP16LessEquals) \ + V(FP16Min) \ + V(FP16Max) \ + V(StringCompareTo) \ + V(StringEquals) \ + V(StringGetCharsNoCheck) \ + V(StringIndexOf) \ + V(StringIndexOfAfter) \ + V(StringStringIndexOf) \ + V(StringStringIndexOfAfter) \ + V(StringNewStringFromBytes) \ + V(StringNewStringFromChars) \ + V(StringNewStringFromString) \ + V(StringBufferAppend) \ + V(StringBufferLength) \ + V(StringBufferToString) \ + V(StringBuilderAppendObject) \ + V(StringBuilderAppendString) \ + V(StringBuilderAppendCharSequence) \ + V(StringBuilderAppendCharArray) \ + V(StringBuilderAppendBoolean) \ + V(StringBuilderAppendChar) \ + V(StringBuilderAppendInt) \ + V(StringBuilderAppendLong) \ + V(StringBuilderAppendFloat) \ + V(StringBuilderAppendDouble) \ + V(StringBuilderLength) \ + V(StringBuilderToString) \ + V(UnsafeCASInt) \ + V(UnsafeCASLong) \ + V(UnsafeCASObject) \ + V(UnsafeGet) \ + V(UnsafeGetVolatile) \ + V(UnsafeGetObject) \ + V(UnsafeGetObjectVolatile) \ + V(UnsafeGetLong) \ + V(UnsafeGetLongVolatile) \ + V(UnsafePut) \ + V(UnsafePutOrdered) \ + V(UnsafePutVolatile) \ + V(UnsafePutObject) \ + V(UnsafePutObjectOrdered) \ + V(UnsafePutObjectVolatile) \ + V(UnsafePutLong) \ + V(UnsafePutLongOrdered) \ + V(UnsafePutLongVolatile) \ + V(UnsafeGetAndAddInt) \ + V(UnsafeGetAndAddLong) \ + V(UnsafeGetAndSetInt) \ + V(UnsafeGetAndSetLong) \ + V(UnsafeGetAndSetObject) \ + V(JdkUnsafeCASInt) \ + V(JdkUnsafeCASLong) \ + V(JdkUnsafeCASObject) \ + V(JdkUnsafeCompareAndSetInt) \ + V(JdkUnsafeCompareAndSetLong) \ + V(JdkUnsafeCompareAndSetObject) \ + V(JdkUnsafeGet) \ + V(JdkUnsafeGetVolatile) \ + V(JdkUnsafeGetAcquire) \ + V(JdkUnsafeGetObject) \ + V(JdkUnsafeGetObjectVolatile) \ + V(JdkUnsafeGetObjectAcquire) \ + V(JdkUnsafeGetLong) \ + V(JdkUnsafeGetLongVolatile) \ + V(JdkUnsafeGetLongAcquire) \ + V(JdkUnsafePut) \ + V(JdkUnsafePutOrdered) \ + V(JdkUnsafePutRelease) \ + V(JdkUnsafePutVolatile) \ + V(JdkUnsafePutObject) \ + V(JdkUnsafePutObjectOrdered) \ + V(JdkUnsafePutObjectVolatile) \ + V(JdkUnsafePutObjectRelease) \ + V(JdkUnsafePutLong) \ + V(JdkUnsafePutLongOrdered) \ + V(JdkUnsafePutLongVolatile) \ + V(JdkUnsafePutLongRelease) \ + V(JdkUnsafeGetAndAddInt) \ + V(JdkUnsafeGetAndAddLong) \ + V(JdkUnsafeGetAndSetInt) \ + V(JdkUnsafeGetAndSetLong) \ + V(JdkUnsafeGetAndSetObject) \ + V(ReferenceGetReferent) \ + V(ReferenceRefersTo) \ + V(IntegerValueOf) \ + V(ThreadInterrupted) \ + V(ReachabilityFence) \ + V(CRC32Update) \ + V(CRC32UpdateBytes) \ + V(CRC32UpdateByteBuffer) \ + V(MethodHandleInvokeExact) \ + V(MethodHandleInvoke) \ + V(VarHandleCompareAndExchange) \ + V(VarHandleCompareAndExchangeAcquire) \ + V(VarHandleCompareAndExchangeRelease) \ + V(VarHandleCompareAndSet) \ + V(VarHandleGet) \ + V(VarHandleGetAcquire) \ + V(VarHandleGetAndAdd) \ + V(VarHandleGetAndAddAcquire) \ + V(VarHandleGetAndAddRelease) \ + V(VarHandleGetAndBitwiseAnd) \ + V(VarHandleGetAndBitwiseAndAcquire) \ + V(VarHandleGetAndBitwiseAndRelease) \ + V(VarHandleGetAndBitwiseOr) \ + V(VarHandleGetAndBitwiseOrAcquire) \ + V(VarHandleGetAndBitwiseOrRelease) \ + V(VarHandleGetAndBitwiseXor) \ + V(VarHandleGetAndBitwiseXorAcquire) \ + V(VarHandleGetAndBitwiseXorRelease) \ + V(VarHandleGetAndSet) \ + V(VarHandleGetAndSetAcquire) \ + V(VarHandleGetAndSetRelease) \ + V(VarHandleGetOpaque) \ + V(VarHandleGetVolatile) \ + V(VarHandleSet) \ + V(VarHandleSetOpaque) \ + V(VarHandleSetRelease) \ + V(VarHandleSetVolatile) \ + V(VarHandleWeakCompareAndSet) \ + V(VarHandleWeakCompareAndSetAcquire) \ + V(VarHandleWeakCompareAndSetPlain) \ + V(VarHandleWeakCompareAndSetRelease) + +// Method register on invoke. +static const XRegister kArtMethodRegister = A0; + +class CodeGeneratorRISCV64; + +class InvokeRuntimeCallingConvention : public CallingConvention<XRegister, FRegister> { + public: + InvokeRuntimeCallingConvention() + : CallingConvention(kRuntimeParameterCoreRegisters, + kRuntimeParameterCoreRegistersLength, + kRuntimeParameterFpuRegisters, + kRuntimeParameterFpuRegistersLength, + kRiscv64PointerSize) {} + + Location GetReturnLocation(DataType::Type return_type); + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); +}; + +class InvokeDexCallingConvention : public CallingConvention<XRegister, FRegister> { + public: + InvokeDexCallingConvention() + : CallingConvention(kParameterCoreRegisters, + kParameterCoreRegistersLength, + kParameterFpuRegisters, + kParameterFpuRegistersLength, + kRiscv64PointerSize) {} + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); +}; + +class InvokeDexCallingConventionVisitorRISCV64 : public InvokeDexCallingConventionVisitor { + public: + InvokeDexCallingConventionVisitorRISCV64() {} + virtual ~InvokeDexCallingConventionVisitorRISCV64() {} + + Location GetNextLocation(DataType::Type type) override; + Location GetReturnLocation(DataType::Type type) const override; + Location GetMethodLocation() const override; + + private: + InvokeDexCallingConvention calling_convention; + + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorRISCV64); +}; + +class CriticalNativeCallingConventionVisitorRiscv64 : public InvokeDexCallingConventionVisitor { + public: + explicit CriticalNativeCallingConventionVisitorRiscv64(bool for_register_allocation) + : for_register_allocation_(for_register_allocation) {} + + virtual ~CriticalNativeCallingConventionVisitorRiscv64() {} + + Location GetNextLocation(DataType::Type type) override; + Location GetReturnLocation(DataType::Type type) const override; + Location GetMethodLocation() const override; + + size_t GetStackOffset() const { return stack_offset_; } + + private: + // Register allocator does not support adjusting frame size, so we cannot provide final locations + // of stack arguments for register allocation. We ask the register allocator for any location and + // move these arguments to the right place after adjusting the SP when generating the call. + const bool for_register_allocation_; + size_t gpr_index_ = 0u; + size_t fpr_index_ = 0u; + size_t stack_offset_ = 0u; + + DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorRiscv64); +}; + +class SlowPathCodeRISCV64 : public SlowPathCode { + public: + explicit SlowPathCodeRISCV64(HInstruction* instruction) + : SlowPathCode(instruction), entry_label_(), exit_label_() {} + + Riscv64Label* GetEntryLabel() { return &entry_label_; } + Riscv64Label* GetExitLabel() { return &exit_label_; } + + private: + Riscv64Label entry_label_; + Riscv64Label exit_label_; + + DISALLOW_COPY_AND_ASSIGN(SlowPathCodeRISCV64); +}; + +class ParallelMoveResolverRISCV64 : public ParallelMoveResolverWithSwap { + public: + ParallelMoveResolverRISCV64(ArenaAllocator* allocator, CodeGeneratorRISCV64* codegen) + : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} + + void EmitMove(size_t index) override; + void EmitSwap(size_t index) override; + void SpillScratch(int reg) override; + void RestoreScratch(int reg) override; + + void Exchange(int index1, int index2, bool double_slot); + + Riscv64Assembler* GetAssembler() const; + + private: + CodeGeneratorRISCV64* const codegen_; + + DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverRISCV64); +}; + +class FieldAccessCallingConventionRISCV64 : public FieldAccessCallingConvention { + public: + FieldAccessCallingConventionRISCV64() {} + + Location GetObjectLocation() const override { + return Location::RegisterLocation(A1); + } + Location GetFieldIndexLocation() const override { + return Location::RegisterLocation(A0); + } + Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { + return Location::RegisterLocation(A0); + } + Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, + bool is_instance) const override { + return is_instance + ? Location::RegisterLocation(A2) + : Location::RegisterLocation(A1); + } + Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { + return Location::FpuRegisterLocation(FA0); + } + + private: + DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionRISCV64); +}; + +class LocationsBuilderRISCV64 : public HGraphVisitor { + public: + LocationsBuilderRISCV64(HGraph* graph, CodeGeneratorRISCV64* codegen) + : HGraphVisitor(graph), codegen_(codegen) {} + +#define DECLARE_VISIT_INSTRUCTION(name, super) void Visit##name(H##name* instr) override; + + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_RISCV64(DECLARE_VISIT_INSTRUCTION) + +#undef DECLARE_VISIT_INSTRUCTION + + void VisitInstruction(HInstruction* instruction) override { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " + << instruction->GetId() << ")"; + } + + protected: + void HandleInvoke(HInvoke* invoke); + void HandleBinaryOp(HBinaryOperation* operation); + void HandleCondition(HCondition* instruction); + void HandleShift(HBinaryOperation* operation); + void HandleFieldSet(HInstruction* instruction); + void HandleFieldGet(HInstruction* instruction); + + InvokeDexCallingConventionVisitorRISCV64 parameter_visitor_; + + CodeGeneratorRISCV64* const codegen_; + + DISALLOW_COPY_AND_ASSIGN(LocationsBuilderRISCV64); +}; + +class InstructionCodeGeneratorRISCV64 : public InstructionCodeGenerator { + public: + InstructionCodeGeneratorRISCV64(HGraph* graph, CodeGeneratorRISCV64* codegen); + +#define DECLARE_VISIT_INSTRUCTION(name, super) void Visit##name(H##name* instr) override; + + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_RISCV64(DECLARE_VISIT_INSTRUCTION) + +#undef DECLARE_VISIT_INSTRUCTION + + void VisitInstruction(HInstruction* instruction) override { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " + << instruction->GetId() << ")"; + } + + Riscv64Assembler* GetAssembler() const { return assembler_; } + + void GenerateMemoryBarrier(MemBarrierKind kind); + + void ShNAdd(XRegister rd, XRegister rs1, XRegister rs2, DataType::Type type); + + protected: + void GenerateClassInitializationCheck(SlowPathCodeRISCV64* slow_path, XRegister class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, XRegister temp); + void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); + void HandleBinaryOp(HBinaryOperation* operation); + void HandleCondition(HCondition* instruction); + void HandleShift(HBinaryOperation* operation); + void HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info, + bool value_can_be_null, + WriteBarrierKind write_barrier_kind); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + + // Generate a heap reference load using one register `out`: + // + // out <- *(out + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // + // Location `maybe_temp` is used when generating a read barrier and + // shall be a register in that case; it may be an invalid location + // otherwise. + void GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option); + // Generate a heap reference load using two different registers + // `out` and `obj`: + // + // out <- *(obj + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // + // Location `maybe_temp` is used when generating a Baker's (fast + // path) read barrier and shall be a register in that case; it may + // be an invalid location otherwise. + void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option); + + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers (if any). + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + XRegister obj, + uint32_t offset, + ReadBarrierOption read_barrier_option, + Riscv64Label* label_low = nullptr); + + void GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, + Riscv64Label* true_target, + Riscv64Label* false_target); + void DivRemOneOrMinusOne(HBinaryOperation* instruction); + void DivRemByPowerOfTwo(HBinaryOperation* instruction); + void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); + void GenerateDivRemIntegral(HBinaryOperation* instruction); + void GenerateIntLongCondition(IfCondition cond, LocationSummary* locations); + void GenerateIntLongCompareAndBranch(IfCondition cond, + LocationSummary* locations, + Riscv64Label* label); + void GenerateFpCondition(IfCondition cond, + bool gt_bias, + DataType::Type type, + LocationSummary* locations, + Riscv64Label* label = nullptr); + void HandleGoto(HInstruction* got, HBasicBlock* successor); + void GenPackedSwitchWithCompares(XRegister adjusted, + XRegister temp, + uint32_t num_entries, + HBasicBlock* switch_block); + void GenTableBasedPackedSwitch(XRegister adjusted, + XRegister temp, + uint32_t num_entries, + HBasicBlock* switch_block); + int32_t VecAddress(LocationSummary* locations, + size_t size, + /*out*/ XRegister* adjusted_base); + void GenConditionalMove(HSelect* select); + + template <typename Reg, + void (Riscv64Assembler::*opS)(Reg, FRegister, FRegister), + void (Riscv64Assembler::*opD)(Reg, FRegister, FRegister)> + void FpBinOp(Reg rd, FRegister rs1, FRegister rs2, DataType::Type type); + void FAdd(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); + void FSub(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); + void FDiv(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); + void FMul(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); + void FMin(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); + void FMax(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); + void FEq(XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); + void FLt(XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); + void FLe(XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); + + template <typename Reg, + void (Riscv64Assembler::*opS)(Reg, FRegister), + void (Riscv64Assembler::*opD)(Reg, FRegister)> + void FpUnOp(Reg rd, FRegister rs1, DataType::Type type); + void FAbs(FRegister rd, FRegister rs1, DataType::Type type); + void FNeg(FRegister rd, FRegister rs1, DataType::Type type); + void FMv(FRegister rd, FRegister rs1, DataType::Type type); + void FClass(XRegister rd, FRegister rs1, DataType::Type type); + + void Load(Location out, XRegister rs1, int32_t offset, DataType::Type type); + void Store(Location value, XRegister rs1, int32_t offset, DataType::Type type); + + Riscv64Assembler* const assembler_; + CodeGeneratorRISCV64* const codegen_; + + DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorRISCV64); +}; + +class CodeGeneratorRISCV64 : public CodeGenerator { + public: + CodeGeneratorRISCV64(HGraph* graph, + const CompilerOptions& compiler_options, + OptimizingCompilerStats* stats = nullptr); + virtual ~CodeGeneratorRISCV64() {} + + void GenerateFrameEntry() override; + void GenerateFrameExit() override; + + void Bind(HBasicBlock* block) override; + + size_t GetWordSize() const override { + // The "word" for the compiler is the core register size (64-bit for riscv64) while the + // riscv64 assembler uses "word" for 32-bit values and "double word" for 64-bit values. + return kRiscv64DoublewordSize; + } + + bool SupportsPredicatedSIMD() const override { + // TODO(riscv64): Check the vector extension. + return false; + } + + // Get FP register width in bytes for spilling/restoring in the slow paths. + // + // Note: In SIMD graphs this should return SIMD register width as all FP and SIMD registers + // alias and live SIMD registers are forced to be spilled in full size in the slow paths. + size_t GetSlowPathFPWidth() const override { + // Default implementation. + return GetCalleePreservedFPWidth(); + } + + size_t GetCalleePreservedFPWidth() const override { + return kRiscv64FloatRegSizeInBytes; + }; + + size_t GetSIMDRegisterWidth() const override { + // TODO(riscv64): Implement SIMD with the Vector extension. + // Note: HLoopOptimization calls this function even for an ISA without SIMD support. + return kRiscv64FloatRegSizeInBytes; + }; + + uintptr_t GetAddressOf(HBasicBlock* block) override { + return assembler_.GetLabelLocation(GetLabelOf(block)); + }; + + Riscv64Label* GetLabelOf(HBasicBlock* block) const { + return CommonGetLabelOf<Riscv64Label>(block_labels_, block); + } + + void Initialize() override { block_labels_ = CommonInitializeLabels<Riscv64Label>(); } + + void MoveConstant(Location destination, int32_t value) override; + void MoveLocation(Location destination, Location source, DataType::Type dst_type) override; + void AddLocationAsTemp(Location location, LocationSummary* locations) override; + + Riscv64Assembler* GetAssembler() override { return &assembler_; } + const Riscv64Assembler& GetAssembler() const override { return assembler_; } + + HGraphVisitor* GetLocationBuilder() override { return &location_builder_; } + HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; } + + void MaybeGenerateInlineCacheCheck(HInstruction* instruction, XRegister klass); + + void SetupBlockedRegisters() const override; + + size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; + + void DumpCoreRegister(std::ostream& stream, int reg) const override; + void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; + + InstructionSet GetInstructionSet() const override { return InstructionSet::kRiscv64; } + + uint32_t GetPreferredSlotsAlignment() const override { + return static_cast<uint32_t>(kRiscv64PointerSize); + } + + void Finalize() override; + + // Generate code to invoke a runtime entry point. + void InvokeRuntime(QuickEntrypointEnum entrypoint, + HInstruction* instruction, + uint32_t dex_pc, + SlowPathCode* slow_path = nullptr) override; + + // Generate code to invoke a runtime entry point, but do not record + // PC-related information in a stack map. + void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path); + + ParallelMoveResolver* GetMoveResolver() override { return &move_resolver_; } + + bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; } + + void IncreaseFrame(size_t adjustment) override; + void DecreaseFrame(size_t adjustment) override; + + void GenerateNop() override; + + void GenerateImplicitNullCheck(HNullCheck* instruction) override; + void GenerateExplicitNullCheck(HNullCheck* instruction) override; + + // Check if the desired_string_load_kind is supported. If it is, return it, + // otherwise return a fall-back kind that should be used instead. + HLoadString::LoadKind GetSupportedLoadStringKind( + HLoadString::LoadKind desired_string_load_kind) override; + + // Check if the desired_class_load_kind is supported. If it is, return it, + // otherwise return a fall-back kind that should be used instead. + HLoadClass::LoadKind GetSupportedLoadClassKind( + HLoadClass::LoadKind desired_class_load_kind) override; + + // Check if the desired_dispatch_info is supported. If it is, return it, + // otherwise return a fall-back info that should be used instead. + HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( + const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, ArtMethod* method) override; + + // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, + // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. + // + // The 20-bit and 12-bit parts of the 32-bit PC-relative offset are patched separately, + // necessitating two patches/infos. There can be more than two patches/infos if the + // instruction supplying the high part is shared with e.g. a slow path, while the low + // part is supplied by separate instructions, e.g.: + // auipc r1, high // patch + // lwu r2, low(r1) // patch + // beqz r2, slow_path + // back: + // ... + // slow_path: + // ... + // sw r2, low(r1) // patch + // j back + struct PcRelativePatchInfo : PatchInfo<Riscv64Label> { + PcRelativePatchInfo(const DexFile* dex_file, + uint32_t off_or_idx, + const PcRelativePatchInfo* info_high) + : PatchInfo<Riscv64Label>(dex_file, off_or_idx), + pc_insn_label(info_high != nullptr ? &info_high->label : &label) { + DCHECK_IMPLIES(info_high != nullptr, info_high->pc_insn_label == &info_high->label); + } + + // Pointer to the info for the high part patch or nullptr if this is the high part patch info. + const Riscv64Label* pc_insn_label; + + private: + PcRelativePatchInfo(PcRelativePatchInfo&& other) = delete; + DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo); + }; + + PcRelativePatchInfo* NewBootImageIntrinsicPatch(uint32_t intrinsic_data, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageJniEntrypointPatch( + MethodReference target_method, const PcRelativePatchInfo* info_high = nullptr); + + PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file, + dex::TypeIndex type_index, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewTypeBssEntryPatch(HLoadClass* load_class, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageStringPatch(const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high = nullptr); + + void EmitPcRelativeAuipcPlaceholder(PcRelativePatchInfo* info_high, XRegister out); + void EmitPcRelativeAddiPlaceholder(PcRelativePatchInfo* info_low, XRegister rd, XRegister rs1); + void EmitPcRelativeLwuPlaceholder(PcRelativePatchInfo* info_low, XRegister rd, XRegister rs1); + void EmitPcRelativeLdPlaceholder(PcRelativePatchInfo* info_low, XRegister rd, XRegister rs1); + + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; + + Literal* DeduplicateBootImageAddressLiteral(uint64_t address); + void PatchJitRootUse(uint8_t* code, + const uint8_t* roots_data, + const Literal* literal, + uint64_t index_in_table) const; + Literal* DeduplicateJitStringLiteral(const DexFile& dex_file, + dex::StringIndex string_index, + Handle<mirror::String> handle); + Literal* DeduplicateJitClassLiteral(const DexFile& dex_file, + dex::TypeIndex type_index, + Handle<mirror::Class> handle); + void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; + + void LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke); + void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, + Location temp, + SlowPathCode* slow_path = nullptr) override; + void GenerateVirtualCall(HInvokeVirtual* invoke, + Location temp, + SlowPathCode* slow_path = nullptr) override; + void MoveFromReturnRegister(Location trg, DataType::Type type) override; + + void GenerateMemoryBarrier(MemBarrierKind kind); + + void MaybeIncrementHotness(bool is_frame_entry); + + bool CanUseImplicitSuspendCheck() const; + + + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + XRegister obj, + uint32_t offset, + Location temp, + bool needs_null_check); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference array load when Baker's read barriers are used. + void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + XRegister obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check); + // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier + // and GenerateArrayLoadWithBakerReadBarrier. + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + XRegister obj, + uint32_t offset, + Location index, + Location temp, + bool needs_null_check); + + // Generate a read barrier for a heap reference within `instruction` + // using a slow path. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` is provided (i.e. for array accesses), the offset + // value passed to artReadBarrierSlow is adjusted to take `index` + // into account. + void GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap + // reference using a slow path. If heap poisoning is enabled, also + // unpoison the reference in `out`. + void MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction` using + // a slow path. + // + // A read barrier for an object reference GC root is implemented as + // a call to the artReadBarrierForRootSlow runtime entry point, + // which is passed the value in location `root`: + // + // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); + // + // The `out` location contains the value returned by + // artReadBarrierForRootSlow. + void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); + + void MarkGCCard(XRegister object, XRegister value, bool value_can_be_null); + + // + // Heap poisoning. + // + + // Poison a heap reference contained in `reg`. + void PoisonHeapReference(XRegister reg); + + // Unpoison a heap reference contained in `reg`. + void UnpoisonHeapReference(XRegister reg); + + // Poison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybePoisonHeapReference(XRegister reg); + + // Unpoison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybeUnpoisonHeapReference(XRegister reg); + + void SwapLocations(Location loc1, Location loc2, DataType::Type type); + + private: + using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>; + using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, Literal*>; + using StringToLiteralMap = + ArenaSafeMap<StringReference, Literal*, StringReferenceValueComparator>; + using TypeToLiteralMap = ArenaSafeMap<TypeReference, Literal*, TypeReferenceValueComparator>; + + Literal* DeduplicateUint32Literal(uint32_t value); + Literal* DeduplicateUint64Literal(uint64_t value); + + PcRelativePatchInfo* NewPcRelativePatch(const DexFile* dex_file, + uint32_t offset_or_index, + const PcRelativePatchInfo* info_high, + ArenaDeque<PcRelativePatchInfo>* patches); + + template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<linker::LinkerPatch>* linker_patches); + + Riscv64Assembler assembler_; + LocationsBuilderRISCV64 location_builder_; + InstructionCodeGeneratorRISCV64 instruction_visitor_; + Riscv64Label frame_entry_label_; + + // Labels for each block that will be compiled. + Riscv64Label* block_labels_; // Indexed by block id. + + ParallelMoveResolverRISCV64 move_resolver_; + + // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. + Uint32ToLiteralMap uint32_literals_; + // Deduplication map for 64-bit literals, used for non-patchable method address or method code + // address. + Uint64ToLiteralMap uint64_literals_; + + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; + // PC-relative method patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; + // PC-relative type patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; + // PC-relative type patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // PC-relative public type patch info for kBssEntryPublic. + ArenaDeque<PcRelativePatchInfo> public_type_bss_entry_patches_; + // PC-relative package type patch info for kBssEntryPackage. + ArenaDeque<PcRelativePatchInfo> package_type_bss_entry_patches_; + // PC-relative String patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; + // PC-relative String patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative+kCallCriticalNative. + ArenaDeque<PcRelativePatchInfo> boot_image_jni_entrypoint_patches_; + // PC-relative patch info for IntrinsicObjects for the boot image, + // and for method/type/string patches for kBootImageRelRo otherwise. + ArenaDeque<PcRelativePatchInfo> boot_image_other_patches_; + + // Patches for string root accesses in JIT compiled code. + StringToLiteralMap jit_string_patches_; + // Patches for class root accesses in JIT compiled code. + TypeToLiteralMap jit_class_patches_; +}; + +} // namespace riscv64 +} // namespace art #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_ diff --git a/compiler/optimizing/code_generator_vector_arm64_neon.cc b/compiler/optimizing/code_generator_vector_arm64_neon.cc index 6b6e25cf0c..848b5e7567 100644 --- a/compiler/optimizing/code_generator_vector_arm64_neon.cc +++ b/compiler/optimizing/code_generator_vector_arm64_neon.cc @@ -61,10 +61,8 @@ inline bool NEONCanEncodeConstantAsImmediate(HConstant* constant, HInstruction* // - constant location - if 'constant' is an actual constant and its value can be // encoded into the instruction. // - register location otherwise. -inline Location NEONEncodableConstantOrRegister(HInstruction* constant, - HInstruction* instr) { - if (constant->IsConstant() - && NEONCanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { +inline Location NEONEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) { + if (constant->IsConstant() && NEONCanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { return Location::ConstantLocation(constant); } @@ -1533,12 +1531,32 @@ void InstructionCodeGeneratorARM64Neon::VisitVecPredWhile(HVecPredWhile* instruc UNREACHABLE(); } -void LocationsBuilderARM64Neon::VisitVecPredCondition(HVecPredCondition* instruction) { +void LocationsBuilderARM64Neon::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); UNREACHABLE(); } -void InstructionCodeGeneratorARM64Neon::VisitVecPredCondition(HVecPredCondition* instruction) { +void InstructionCodeGeneratorARM64Neon::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void LocationsBuilderARM64Neon::VisitVecCondition(HVecCondition* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void InstructionCodeGeneratorARM64Neon::VisitVecCondition(HVecCondition* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void LocationsBuilderARM64Neon::VisitVecPredNot(HVecPredNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void InstructionCodeGeneratorARM64Neon::VisitVecPredNot(HVecPredNot* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); UNREACHABLE(); } diff --git a/compiler/optimizing/code_generator_vector_arm64_sve.cc b/compiler/optimizing/code_generator_vector_arm64_sve.cc index fe15791d3f..ef79932899 100644 --- a/compiler/optimizing/code_generator_vector_arm64_sve.cc +++ b/compiler/optimizing/code_generator_vector_arm64_sve.cc @@ -62,8 +62,7 @@ static bool SVECanEncodeConstantAsImmediate(HConstant* constant, HInstruction* i // encoded into the instruction. // - register location otherwise. inline Location SVEEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) { - if (constant->IsConstant() - && SVECanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { + if (constant->IsConstant() && SVECanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { return Location::ConstantLocation(constant); } @@ -246,7 +245,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecReduce(HVecReduce* instruction) { LocationSummary* locations = instruction->GetLocations(); const ZRegister src = ZRegisterFrom(locations->InAt(0)); const VRegister dst = DRegisterFrom(locations->Out()); - const PRegister p_reg = LoopPReg(); + const PRegister p_reg = GetVecGoverningPReg(instruction); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kInt32: @@ -284,7 +283,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecCnv(HVecCnv* instruction) { LocationSummary* locations = instruction->GetLocations(); const ZRegister src = ZRegisterFrom(locations->InAt(0)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); DataType::Type from = instruction->GetInputType(); DataType::Type to = instruction->GetResultType(); ValidateVectorLength(instruction); @@ -304,7 +303,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecNeg(HVecNeg* instruction) { LocationSummary* locations = instruction->GetLocations(); const ZRegister src = ZRegisterFrom(locations->InAt(0)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kUint8: @@ -342,7 +341,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecAbs(HVecAbs* instruction) { LocationSummary* locations = instruction->GetLocations(); const ZRegister src = ZRegisterFrom(locations->InAt(0)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kInt8: @@ -378,7 +377,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecNot(HVecNot* instruction) { LocationSummary* locations = instruction->GetLocations(); const ZRegister src = ZRegisterFrom(locations->InAt(0)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kBool: // special case boolean-not @@ -438,7 +437,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecAdd(HVecAdd* instruction) { const ZRegister lhs = ZRegisterFrom(locations->InAt(0)); const ZRegister rhs = ZRegisterFrom(locations->InAt(1)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kUint8: @@ -497,7 +496,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecSub(HVecSub* instruction) { const ZRegister lhs = ZRegisterFrom(locations->InAt(0)); const ZRegister rhs = ZRegisterFrom(locations->InAt(1)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kUint8: @@ -546,7 +545,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecMul(HVecMul* instruction) { const ZRegister lhs = ZRegisterFrom(locations->InAt(0)); const ZRegister rhs = ZRegisterFrom(locations->InAt(1)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kUint8: @@ -585,7 +584,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecDiv(HVecDiv* instruction) { const ZRegister lhs = ZRegisterFrom(locations->InAt(0)); const ZRegister rhs = ZRegisterFrom(locations->InAt(1)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); ValidateVectorLength(instruction); // Note: VIXL guarantees StrictNaNPropagation for Fdiv. @@ -633,7 +632,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecAnd(HVecAnd* instruction) { const ZRegister lhs = ZRegisterFrom(locations->InAt(0)); const ZRegister rhs = ZRegisterFrom(locations->InAt(1)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kBool: @@ -678,7 +677,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecOr(HVecOr* instruction) { const ZRegister lhs = ZRegisterFrom(locations->InAt(0)); const ZRegister rhs = ZRegisterFrom(locations->InAt(1)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kBool: @@ -714,7 +713,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecXor(HVecXor* instruction) { const ZRegister lhs = ZRegisterFrom(locations->InAt(0)); const ZRegister rhs = ZRegisterFrom(locations->InAt(1)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kBool: @@ -769,7 +768,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecShl(HVecShl* instruction) { LocationSummary* locations = instruction->GetLocations(); const ZRegister lhs = ZRegisterFrom(locations->InAt(0)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { @@ -802,7 +801,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecShr(HVecShr* instruction) { LocationSummary* locations = instruction->GetLocations(); const ZRegister lhs = ZRegisterFrom(locations->InAt(0)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { @@ -835,7 +834,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecUShr(HVecUShr* instruction) { LocationSummary* locations = instruction->GetLocations(); const ZRegister lhs = ZRegisterFrom(locations->InAt(0)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { @@ -966,7 +965,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecMultiplyAccumulate( const ZRegister acc = ZRegisterFrom(locations->InAt(0)); const ZRegister left = ZRegisterFrom(locations->InAt(1)); const ZRegister right = ZRegisterFrom(locations->InAt(2)); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); DCHECK(locations->InAt(0).Equals(locations->Out())); ValidateVectorLength(instruction); @@ -1029,7 +1028,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecDotProd(HVecDotProd* instruction) const ZRegister acc = ZRegisterFrom(locations->InAt(0)); const ZRegister left = ZRegisterFrom(locations->InAt(1)); const ZRegister right = ZRegisterFrom(locations->InAt(2)); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), @@ -1099,7 +1098,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecLoad(HVecLoad* instruction) { const ZRegister reg = ZRegisterFrom(locations->Out()); UseScratchRegisterScope temps(GetVIXLAssembler()); Register scratch; - const PRegisterZ p_reg = LoopPReg().Zeroing(); + const PRegisterZ p_reg = GetVecGoverningPReg(instruction).Zeroing(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { @@ -1141,7 +1140,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecStore(HVecStore* instruction) { const ZRegister reg = ZRegisterFrom(locations->InAt(2)); UseScratchRegisterScope temps(GetVIXLAssembler()); Register scratch; - const PRegisterZ p_reg = LoopPReg().Zeroing(); + const PRegisterZ p_reg = GetVecGoverningPReg(instruction).Zeroing(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { @@ -1182,25 +1181,25 @@ void LocationsBuilderARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) { void InstructionCodeGeneratorARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) { // Instruction is not predicated, see nodes_vector.h DCHECK(!instruction->IsPredicated()); - const PRegister p_reg = LoopPReg(); + const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - __ Ptrue(p_reg.VnB(), vixl::aarch64::SVE_ALL); + __ Ptrue(output_p_reg.VnB(), vixl::aarch64::SVE_ALL); break; case DataType::Type::kUint16: case DataType::Type::kInt16: - __ Ptrue(p_reg.VnH(), vixl::aarch64::SVE_ALL); + __ Ptrue(output_p_reg.VnH(), vixl::aarch64::SVE_ALL); break; case DataType::Type::kInt32: case DataType::Type::kFloat32: - __ Ptrue(p_reg.VnS(), vixl::aarch64::SVE_ALL); + __ Ptrue(output_p_reg.VnS(), vixl::aarch64::SVE_ALL); break; case DataType::Type::kInt64: case DataType::Type::kFloat64: - __ Ptrue(p_reg.VnD(), vixl::aarch64::SVE_ALL); + __ Ptrue(output_p_reg.VnD(), vixl::aarch64::SVE_ALL); break; default: LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); @@ -1208,6 +1207,67 @@ void InstructionCodeGeneratorARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instru } } +void LocationsBuilderARM64Sve::VisitVecCondition(HVecCondition* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARM64Sve::VisitVecCondition(HVecCondition* instruction) { + DCHECK(instruction->IsPredicated()); + LocationSummary* locations = instruction->GetLocations(); + const ZRegister left = ZRegisterFrom(locations->InAt(0)); + const ZRegister right = ZRegisterFrom(locations->InAt(1)); + const PRegisterZ p_reg = GetVecGoverningPReg(instruction).Zeroing(); + const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction); + + HVecOperation* a = instruction->InputAt(0)->AsVecOperation(); + HVecOperation* b = instruction->InputAt(1)->AsVecOperation(); + DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), + HVecOperation::ToSignedType(b->GetPackedType())); + ValidateVectorLength(instruction); + + // TODO: Support other condition OPs and types. + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + case DataType::Type::kInt8: + __ Cmpeq(output_p_reg.VnB(), p_reg, left.VnB(), right.VnB()); + break; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + __ Cmpeq(output_p_reg.VnH(), p_reg, left.VnH(), right.VnH()); + break; + case DataType::Type::kInt32: + __ Cmpeq(output_p_reg.VnS(), p_reg, left.VnS(), right.VnS()); + break; + case DataType::Type::kInt64: + __ Cmpeq(output_p_reg.VnD(), p_reg, left.VnD(), right.VnD()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderARM64Sve::VisitVecPredNot(HVecPredNot* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + DCHECK(instruction->InputAt(0)->IsVecPredSetOperation()); + locations->SetInAt(0, Location::NoLocation()); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARM64Sve::VisitVecPredNot(HVecPredNot* instruction) { + DCHECK(instruction->IsPredicated()); + + const PRegister input_p_reg = GetVecPredSetFixedOutPReg( + instruction->InputAt(0)->AsVecPredSetOperation()); + const PRegister control_p_reg = GetVecGoverningPReg(instruction); + const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction); + + __ Not(output_p_reg.VnB(), control_p_reg.Zeroing(), input_p_reg.VnB()); +} + void LocationsBuilderARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); locations->SetInAt(0, Location::RequiresRegister()); @@ -1218,8 +1278,8 @@ void LocationsBuilderARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) { // Semantically, the out location of this instruction and predicate inputs locations of // its users should be a fixed predicate register (similar to // Location::RegisterLocation(int reg)). But the register allocator (RA) doesn't support - // SIMD regs (e.g. predicate), so LoopPReg() is used explicitly without exposing it - // to the RA. + // SIMD regs (e.g. predicate), so fixed registers are used explicitly without exposing it + // to the RA (through GetVecPredSetFixedOutPReg()). // // To make the RA happy Location::NoLocation() was used for all the vector instructions // predicate inputs; but for the PredSetOperations (e.g. VecPredWhile) Location::NoLocation() @@ -1241,21 +1301,22 @@ void InstructionCodeGeneratorARM64Sve::VisitVecPredWhile(HVecPredWhile* instruct DCHECK(instruction->GetCondKind() == HVecPredWhile::CondKind::kLO); Register left = InputRegisterAt(instruction, 0); Register right = InputRegisterAt(instruction, 1); + const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction); DCHECK_EQ(codegen_->GetSIMDRegisterWidth() % instruction->GetVectorLength(), 0u); switch (codegen_->GetSIMDRegisterWidth() / instruction->GetVectorLength()) { case 1u: - __ Whilelo(LoopPReg().VnB(), left, right); + __ Whilelo(output_p_reg.VnB(), left, right); break; case 2u: - __ Whilelo(LoopPReg().VnH(), left, right); + __ Whilelo(output_p_reg.VnH(), left, right); break; case 4u: - __ Whilelo(LoopPReg().VnS(), left, right); + __ Whilelo(output_p_reg.VnS(), left, right); break; case 8u: - __ Whilelo(LoopPReg().VnD(), left, right); + __ Whilelo(output_p_reg.VnD(), left, right); break; default: LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); @@ -1263,20 +1324,20 @@ void InstructionCodeGeneratorARM64Sve::VisitVecPredWhile(HVecPredWhile* instruct } } -void LocationsBuilderARM64Sve::VisitVecPredCondition(HVecPredCondition* instruction) { +void LocationsBuilderARM64Sve::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); locations->SetInAt(0, Location::NoLocation()); // Result of the operation - a boolean value in a core register. locations->SetOut(Location::RequiresRegister()); } -void InstructionCodeGeneratorARM64Sve::VisitVecPredCondition(HVecPredCondition* instruction) { +void InstructionCodeGeneratorARM64Sve::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { // Instruction is not predicated, see nodes_vector.h DCHECK(!instruction->IsPredicated()); Register reg = OutputRegister(instruction); - // Currently VecPredCondition is only used as part of vectorized loop check condition + // Currently VecPredToBoolean is only used as part of vectorized loop check condition // evaluation. - DCHECK(instruction->GetPCondKind() == HVecPredCondition::PCondKind::kNFirst); + DCHECK(instruction->GetPCondKind() == HVecPredToBoolean::PCondKind::kNFirst); __ Cset(reg, pl); } diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc index e8ecf28386..70f22af17b 100644 --- a/compiler/optimizing/code_generator_vector_arm_vixl.cc +++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc @@ -1069,12 +1069,32 @@ void InstructionCodeGeneratorARMVIXL::VisitVecPredWhile(HVecPredWhile* instructi UNREACHABLE(); } -void LocationsBuilderARMVIXL::VisitVecPredCondition(HVecPredCondition* instruction) { +void LocationsBuilderARMVIXL::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); UNREACHABLE(); } -void InstructionCodeGeneratorARMVIXL::VisitVecPredCondition(HVecPredCondition* instruction) { +void InstructionCodeGeneratorARMVIXL::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void LocationsBuilderARMVIXL::VisitVecCondition(HVecCondition* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecCondition(HVecCondition* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void LocationsBuilderARMVIXL::VisitVecPredNot(HVecPredNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecPredNot(HVecPredNot* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); UNREACHABLE(); } diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 343a6e1af4..1f9b2578ac 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -1401,12 +1401,32 @@ void InstructionCodeGeneratorX86::VisitVecPredWhile(HVecPredWhile* instruction) UNREACHABLE(); } -void LocationsBuilderX86::VisitVecPredCondition(HVecPredCondition* instruction) { +void LocationsBuilderX86::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); UNREACHABLE(); } -void InstructionCodeGeneratorX86::VisitVecPredCondition(HVecPredCondition* instruction) { +void InstructionCodeGeneratorX86::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void LocationsBuilderX86::VisitVecCondition(HVecCondition* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void InstructionCodeGeneratorX86::VisitVecCondition(HVecCondition* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void LocationsBuilderX86::VisitVecPredNot(HVecPredNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void InstructionCodeGeneratorX86::VisitVecPredNot(HVecPredNot* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); UNREACHABLE(); } diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index fb6e4e753f..47afa3b4a1 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -1374,12 +1374,32 @@ void InstructionCodeGeneratorX86_64::VisitVecPredWhile(HVecPredWhile* instructio UNREACHABLE(); } -void LocationsBuilderX86_64::VisitVecPredCondition(HVecPredCondition* instruction) { +void LocationsBuilderX86_64::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); UNREACHABLE(); } -void InstructionCodeGeneratorX86_64::VisitVecPredCondition(HVecPredCondition* instruction) { +void InstructionCodeGeneratorX86_64::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void LocationsBuilderX86_64::VisitVecCondition(HVecCondition* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void InstructionCodeGeneratorX86_64::VisitVecCondition(HVecCondition* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void LocationsBuilderX86_64::VisitVecPredNot(HVecPredNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void InstructionCodeGeneratorX86_64::VisitVecPredNot(HVecPredNot* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); UNREACHABLE(); } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index cb1cecc45a..b8c8d9f73d 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -27,6 +27,7 @@ #include "heap_poisoning.h" #include "interpreter/mterp/nterp.h" #include "intrinsics.h" +#include "intrinsics_list.h" #include "intrinsics_utils.h" #include "intrinsics_x86.h" #include "jit/profiling_info.h" @@ -38,6 +39,7 @@ #include "optimizing/nodes.h" #include "scoped_thread_state_change-inl.h" #include "thread.h" +#include "trace.h" #include "utils/assembler.h" #include "utils/stack_checks.h" #include "utils/x86/assembler_x86.h" @@ -839,7 +841,8 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObject) || - (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectVolatile) || + (instruction_->AsInvoke()->GetIntrinsic() == + Intrinsics::kJdkUnsafeGetObjectVolatile) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectAcquire)) << instruction_->AsInvoke()->GetIntrinsic(); DCHECK_EQ(offset_, 0U); @@ -1107,6 +1110,7 @@ void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) { } namespace detail { + // Mark which intrinsics we don't have handcrafted code for. template <Intrinsics T> struct IsUnimplemented { @@ -1121,15 +1125,13 @@ struct IsUnimplemented { UNIMPLEMENTED_INTRINSIC_LIST_X86(TRUE_OVERRIDE) #undef TRUE_OVERRIDE -#include "intrinsics_list.h" static constexpr bool kIsIntrinsicUnimplemented[] = { - false, // kNone + false, // kNone #define IS_UNIMPLEMENTED(Intrinsic, ...) \ - IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, - INTRINSICS_LIST(IS_UNIMPLEMENTED) + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + ART_INTRINSICS_LIST(IS_UNIMPLEMENTED) #undef IS_UNIMPLEMENTED }; -#undef INTRINSICS_LIST } // namespace detail @@ -1140,8 +1142,7 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, kNumberOfCpuRegisters, kNumberOfXmmRegisters, kNumberOfRegisterPairs, - ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves), - arraysize(kCoreCalleeSaves)) + ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves)) | (1 << kFakeReturnRegister), 0, compiler_options, @@ -1221,12 +1222,18 @@ void LocationsBuilderX86::VisitMethodExitHook(HMethodExitHook* method_hook) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); SetInForReturnValue(method_hook, locations); + // We use rdtsc to obtain a timestamp for tracing. rdtsc returns the results in EAX + EDX. + locations->AddTemp(Location::RegisterLocation(EAX)); + locations->AddTemp(Location::RegisterLocation(EDX)); + // An additional temporary register to hold address to store the timestamp counter. + locations->AddTemp(Location::RequiresRegister()); } void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* instruction) { SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86(instruction); codegen_->AddSlowPath(slow_path); + LocationSummary* locations = instruction->GetLocations(); if (instruction->IsMethodExitHook()) { // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it @@ -1242,8 +1249,51 @@ void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* inst MemberOffset offset = instruction->IsMethodExitHook() ? instrumentation::Instrumentation::HaveMethodExitListenersOffset() : instrumentation::Instrumentation::HaveMethodEntryListenersOffset(); - __ cmpb(Address::Absolute(address + offset.Int32Value()), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ cmpb(Address::Absolute(address + offset.Int32Value()), + Immediate(instrumentation::Instrumentation::kFastTraceListeners)); + // Check if there are any trace method entry / exit listeners. If no, continue. + __ j(kLess, slow_path->GetExitLabel()); + // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners. + // If yes, just take the slow path. + __ j(kGreater, slow_path->GetEntryLabel()); + + // For entry_addr use the first temp that isn't EAX or EDX. We need this after + // rdtsc which returns values in EAX + EDX. + Register entry_addr = locations->GetTemp(2).AsRegister<Register>(); + Register index = locations->GetTemp(1).AsRegister<Register>(); + + // Check if there is place in the buffer for a new entry, if no, take slow path. + uint32_t trace_buffer_ptr = Thread::TraceBufferPtrOffset<kX86PointerSize>().Int32Value(); + uint64_t trace_buffer_index_offset = + Thread::TraceBufferIndexOffset<kX86PointerSize>().Int32Value(); + + __ fs()->movl(index, Address::Absolute(trace_buffer_index_offset)); + __ subl(index, Immediate(kNumEntriesForWallClock)); + __ j(kLess, slow_path->GetEntryLabel()); + + // Update the index in the `Thread`. + __ fs()->movl(Address::Absolute(trace_buffer_index_offset), index); + // Calculate the entry address in the buffer. + // entry_addr = base_addr + sizeof(void*) * index + __ fs()->movl(entry_addr, Address::Absolute(trace_buffer_ptr)); + __ leal(entry_addr, Address(entry_addr, index, TIMES_4, 0)); + + // Record method pointer and trace action. + Register method = index; + __ movl(method, Address(ESP, kCurrentMethodStackOffset)); + // Use last two bits to encode trace method action. For MethodEntry it is 0 + // so no need to set the bits since they are 0 already. + if (instruction->IsMethodExitHook()) { + DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4)); + static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0); + static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1); + __ orl(method, Immediate(enum_cast<int32_t>(TraceAction::kTraceMethodExit))); + } + __ movl(Address(entry_addr, kMethodOffsetInBytes), method); + // Get the timestamp. rdtsc returns timestamp in EAX + EDX. + __ rdtsc(); + __ movl(Address(entry_addr, kTimestampOffsetInBytes), EAX); + __ movl(Address(entry_addr, kHighTimestampOffsetInBytes), EDX); __ Bind(slow_path->GetExitLabel()); } @@ -1254,7 +1304,13 @@ void InstructionCodeGeneratorX86::VisitMethodExitHook(HMethodExitHook* instructi } void LocationsBuilderX86::VisitMethodEntryHook(HMethodEntryHook* method_hook) { - new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); + // We use rdtsc to obtain a timestamp for tracing. rdtsc returns the results in EAX + EDX. + locations->AddTemp(Location::RegisterLocation(EAX)); + locations->AddTemp(Location::RegisterLocation(EDX)); + // An additional temporary register to hold address to store the timestamp counter. + locations->AddTemp(Location::RequiresRegister()); } void InstructionCodeGeneratorX86::VisitMethodEntryHook(HMethodEntryHook* instruction) { @@ -1865,8 +1921,7 @@ void LocationsBuilderX86::VisitExit(HExit* exit) { exit->SetLocations(nullptr); } -void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { -} +void InstructionCodeGeneratorX86::VisitExit([[maybe_unused]] HExit* exit) {} template<class LabelType> void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond, @@ -1981,7 +2036,7 @@ void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double) { - HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable(); + HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTableOrNull(); if (is_double) { if (rhs.IsFpuRegister()) { __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>()); @@ -2506,7 +2561,7 @@ void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitIntConstant([[maybe_unused]] HIntConstant* constant) { // Will be generated at use site. } @@ -2516,7 +2571,7 @@ void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitNullConstant([[maybe_unused]] HNullConstant* constant) { // Will be generated at use site. } @@ -2526,7 +2581,7 @@ void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitLongConstant([[maybe_unused]] HLongConstant* constant) { // Will be generated at use site. } @@ -2536,7 +2591,7 @@ void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) { // Will be generated at use site. } @@ -2546,7 +2601,7 @@ void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitDoubleConstant([[maybe_unused]] HDoubleConstant* constant) { // Will be generated at use site. } @@ -2555,7 +2610,7 @@ void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_f } void InstructionCodeGeneratorX86::VisitConstructorFence( - HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + [[maybe_unused]] HConstructorFence* constructor_fence) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } @@ -2571,7 +2626,7 @@ void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) { ret->SetLocations(nullptr); } -void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) { codegen_->GenerateFrameExit(); } @@ -2954,10 +3009,10 @@ void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) { constant_area)); __ xorps(out.AsFpuRegister<XmmRegister>(), mask); } else { - __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000), - neg->GetBaseMethodAddress(), - constant_area)); - __ xorpd(out.AsFpuRegister<XmmRegister>(), mask); + __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000), + neg->GetBaseMethodAddress(), + constant_area)); + __ xorpd(out.AsFpuRegister<XmmRegister>(), mask); } } @@ -5086,8 +5141,7 @@ void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) { } void InstructionCodeGeneratorX86::VisitParameterValue( - HParameterValue* instruction ATTRIBUTE_UNUSED) { -} + [[maybe_unused]] HParameterValue* instruction) {} void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) { LocationSummary* locations = @@ -5095,7 +5149,7 @@ void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) { locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument)); } -void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitCurrentMethod([[maybe_unused]] HCurrentMethod* instruction) { } void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) { @@ -5294,7 +5348,7 @@ void LocationsBuilderX86::VisitPhi(HPhi* instruction) { locations->SetOut(Location::Any()); } -void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitPhi([[maybe_unused]] HPhi* instruction) { LOG(FATAL) << "Unreachable"; } @@ -5323,8 +5377,8 @@ void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { } HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch( - const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - ArtMethod* method ATTRIBUTE_UNUSED) { + const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, + [[maybe_unused]] ArtMethod* method) { return desired_dispatch_info; } @@ -6749,7 +6803,7 @@ void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) { } } -void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { +void LocationsBuilderX86::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) { LOG(FATAL) << "Unreachable"; } @@ -7213,9 +7267,8 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE Register out = out_loc.AsRegister<Register>(); bool generate_null_check = false; - const ReadBarrierOption read_barrier_option = cls->IsInBootImage() - ? kWithoutReadBarrier - : gCompilerReadBarrierOption; + const ReadBarrierOption read_barrier_option = + cls->IsInBootImage() ? kWithoutReadBarrier : GetCompilerReadBarrierOption(); switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: { DCHECK(!cls->CanCallRuntime()); @@ -7445,7 +7498,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S Address address = Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset); Label* fixup_label = codegen_->NewStringBssEntryPatch(load); // /* GcRoot<mirror::String> */ out = *address /* PC-relative */ - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption); + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, GetCompilerReadBarrierOption()); // No need for memory fence, thanks to the x86 memory model. SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load); codegen_->AddSlowPath(slow_path); @@ -7465,14 +7518,13 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S Label* fixup_label = codegen_->NewJitRootStringPatch( load->GetDexFile(), load->GetStringIndex(), load->GetString()); // /* GcRoot<mirror::String> */ out = *address - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption); + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, GetCompilerReadBarrierOption()); return; } default: break; } - // TODO: Re-add the compiler code to do string dex cache lookup again. InvokeRuntimeCallingConvention calling_convention; DCHECK_EQ(calling_convention.GetRegisterAt(0), out); __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_)); @@ -7498,7 +7550,7 @@ void LocationsBuilderX86::VisitClearException(HClearException* clear) { new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); } -void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitClearException([[maybe_unused]] HClearException* clear) { __ fs()->movl(GetExceptionTlsAddress(), Immediate(0)); } @@ -7840,7 +7892,6 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { } else { locations->SetInAt(1, Location::Any()); } - // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86. locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -8028,11 +8079,11 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { kWithoutReadBarrier); // /* HeapReference<Class> */ temp = temp->iftable_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - temp_loc, - iftable_offset, - kWithoutReadBarrier); + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + iftable_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Iftable is never null. __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset)); // Maybe poison the `cls` for direct comparison with memory. @@ -8584,12 +8635,12 @@ void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction, __ Bind(slow_path->GetExitLabel()); } -void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { +void LocationsBuilderX86::VisitBoundType([[maybe_unused]] HBoundType* instruction) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; } -void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitBoundType([[maybe_unused]] HBoundType* instruction) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; } @@ -8782,13 +8833,15 @@ void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromCons case DataType::Type::kFloat32: __ movss(out.AsFpuRegister<XmmRegister>(), codegen_->LiteralFloatAddress( - value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area)); + value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area)); break; case DataType::Type::kFloat64: __ movsd(out.AsFpuRegister<XmmRegister>(), codegen_->LiteralDoubleAddress( - value->AsDoubleConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area)); + value->AsDoubleConstant()->GetValue(), + insn->GetBaseMethodAddress(), + const_area)); break; case DataType::Type::kInt32: @@ -8877,7 +8930,7 @@ class JumpTableRIPFixup : public RIPFixup { const HX86PackedSwitch* switch_instr_; }; -void CodeGeneratorX86::Finalize(CodeAllocator* allocator) { +void CodeGeneratorX86::Finalize() { // Generate the constant area if needed. X86Assembler* assembler = GetAssembler(); @@ -8897,7 +8950,7 @@ void CodeGeneratorX86::Finalize(CodeAllocator* allocator) { } // And finish up. - CodeGenerator::Finalize(allocator); + CodeGenerator::Finalize(); } Address CodeGeneratorX86::LiteralDoubleAddress(double v, @@ -8968,9 +9021,9 @@ Address CodeGeneratorX86::ArrayAddress(Register obj, Location index, ScaleFactor scale, uint32_t data_offset) { - return index.IsConstant() ? - Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) : - Address(obj, index.AsRegister<Register>(), scale, data_offset); + return index.IsConstant() + ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) + : Address(obj, index.AsRegister<Register>(), scale, data_offset); } Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr, @@ -9025,7 +9078,7 @@ void CodeGeneratorX86::PatchJitRootUse(uint8_t* code, reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t; reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] = - dchecked_integral_cast<uint32_t>(address); + dchecked_integral_cast<uint32_t>(address); } void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { @@ -9042,13 +9095,13 @@ void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_da } } -void LocationsBuilderX86::VisitIntermediateAddress(HIntermediateAddress* instruction - ATTRIBUTE_UNUSED) { +void LocationsBuilderX86::VisitIntermediateAddress( + [[maybe_unused]] HIntermediateAddress* instruction) { LOG(FATAL) << "Unreachable"; } -void InstructionCodeGeneratorX86::VisitIntermediateAddress(HIntermediateAddress* instruction - ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitIntermediateAddress( + [[maybe_unused]] HIntermediateAddress* instruction) { LOG(FATAL) << "Unreachable"; } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index d27155f31d..aa25528e08 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -196,7 +196,7 @@ class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention { ? Location::RegisterLocation(EDX) : Location::RegisterLocation(ECX)); } - Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { + Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override { return Location::FpuRegisterLocation(XMM0); } @@ -635,7 +635,7 @@ class CodeGeneratorX86 : public CodeGenerator { Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value); - void Finalize(CodeAllocator* allocator) override; + void Finalize() override; // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index eea6b204fa..f61a1f04c3 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -27,6 +27,7 @@ #include "heap_poisoning.h" #include "interpreter/mterp/nterp.h" #include "intrinsics.h" +#include "intrinsics_list.h" #include "intrinsics_utils.h" #include "intrinsics_x86_64.h" #include "jit/profiling_info.h" @@ -39,6 +40,7 @@ #include "optimizing/nodes.h" #include "scoped_thread_state_change-inl.h" #include "thread.h" +#include "trace.h" #include "utils/assembler.h" #include "utils/stack_checks.h" #include "utils/x86_64/assembler_x86_64.h" @@ -856,7 +858,8 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObject) || - (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectVolatile) || + (instruction_->AsInvoke()->GetIntrinsic() == + Intrinsics::kJdkUnsafeGetObjectVolatile) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectAcquire)) << instruction_->AsInvoke()->GetIntrinsic(); DCHECK_EQ(offset_, 0U); @@ -1070,8 +1073,8 @@ void CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(LocationSummary* location } HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch( - const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - ArtMethod* method ATTRIBUTE_UNUSED) { + const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, + [[maybe_unused]] ArtMethod* method) { return desired_dispatch_info; } @@ -1495,6 +1498,7 @@ void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) { } namespace detail { + // Mark which intrinsics we don't have handcrafted code for. template <Intrinsics T> struct IsUnimplemented { @@ -1509,15 +1513,13 @@ struct IsUnimplemented { UNIMPLEMENTED_INTRINSIC_LIST_X86_64(TRUE_OVERRIDE) #undef TRUE_OVERRIDE -#include "intrinsics_list.h" static constexpr bool kIsIntrinsicUnimplemented[] = { - false, // kNone + false, // kNone #define IS_UNIMPLEMENTED(Intrinsic, ...) \ - IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, - INTRINSICS_LIST(IS_UNIMPLEMENTED) + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + ART_INTRINSICS_LIST(IS_UNIMPLEMENTED) #undef IS_UNIMPLEMENTED }; -#undef INTRINSICS_LIST } // namespace detail @@ -1531,11 +1533,9 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, kNumberOfCpuRegisters, kNumberOfFloatRegisters, kNumberOfCpuRegisterPairs, - ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves), - arraysize(kCoreCalleeSaves)) + ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves)) | (1 << kFakeReturnRegister), - ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves), - arraysize(kFpuCalleeSaves)), + ComputeRegisterMask(kFpuCalleeSaves, arraysize(kFpuCalleeSaves)), compiler_options, stats, ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)), @@ -1585,12 +1585,18 @@ static dwarf::Reg DWARFReg(FloatRegister reg) { } void LocationsBuilderX86_64::VisitMethodEntryHook(HMethodEntryHook* method_hook) { - new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); + // We use rdtsc to record the timestamp for method profiling. rdtsc returns + // two 32-bit values in EAX + EDX even on 64-bit architectures. + locations->AddTemp(Location::RegisterLocation(RAX)); + locations->AddTemp(Location::RegisterLocation(RDX)); } void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* instruction) { SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86_64(instruction); + LocationSummary* locations = instruction->GetLocations(); codegen_->AddSlowPath(slow_path); if (instruction->IsMethodExitHook()) { @@ -1609,8 +1615,51 @@ void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* i instrumentation::Instrumentation::HaveMethodExitListenersOffset() : instrumentation::Instrumentation::HaveMethodEntryListenersOffset(); __ movq(CpuRegister(TMP), Immediate(address + offset.Int32Value())); - __ cmpb(Address(CpuRegister(TMP), 0), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ cmpb(Address(CpuRegister(TMP), 0), + Immediate(instrumentation::Instrumentation::kFastTraceListeners)); + // Check if there are any method entry / exit listeners. If no, continue with execution. + __ j(kLess, slow_path->GetExitLabel()); + // Check if there are any slow method entry / exit listeners. If yes, take the slow path. + __ j(kGreater, slow_path->GetEntryLabel()); + + // Check if there is place in the buffer for a new entry, if no, take slow path. + CpuRegister index = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister entry_addr = CpuRegister(TMP); + uint64_t trace_buffer_index_offset = + Thread::TraceBufferIndexOffset<kX86_64PointerSize>().SizeValue(); + __ gs()->movq(CpuRegister(index), + Address::Absolute(trace_buffer_index_offset, /* no_rip= */ true)); + __ subq(CpuRegister(index), Immediate(kNumEntriesForWallClock)); + __ j(kLess, slow_path->GetEntryLabel()); + + // Update the index in the `Thread`. + __ gs()->movq(Address::Absolute(trace_buffer_index_offset, /* no_rip= */ true), + CpuRegister(index)); + // Calculate the entry address in the buffer. + // entry_addr = base_addr + sizeof(void*) * index + __ gs()->movq(entry_addr, + Address::Absolute(Thread::TraceBufferPtrOffset<kX86_64PointerSize>().SizeValue(), + /* no_rip= */ true)); + __ leaq(CpuRegister(entry_addr), + Address(CpuRegister(entry_addr), CpuRegister(index), TIMES_8, 0)); + + // Record method pointer and action. + CpuRegister method = index; + __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset)); + // Use last two bits to encode trace method action. For MethodEntry it is 0 + // so no need to set the bits since they are 0 already. + if (instruction->IsMethodExitHook()) { + DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4)); + static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0); + static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1); + __ orq(method, Immediate(enum_cast<int32_t>(TraceAction::kTraceMethodExit))); + } + __ movq(Address(entry_addr, kMethodOffsetInBytes), CpuRegister(method)); + // Get the timestamp. rdtsc returns timestamp in RAX + RDX even in 64-bit architectures. + __ rdtsc(); + __ shlq(CpuRegister(RDX), Immediate(32)); + __ orq(CpuRegister(RAX), CpuRegister(RDX)); + __ movq(Address(entry_addr, kTimestampOffsetInBytes), CpuRegister(RAX)); __ Bind(slow_path->GetExitLabel()); } @@ -1651,6 +1700,10 @@ void LocationsBuilderX86_64::VisitMethodExitHook(HMethodExitHook* method_hook) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); SetInForReturnValue(method_hook, locations); + // We use rdtsc to record the timestamp for method profiling. rdtsc returns + // two 32-bit values in EAX + EDX even on 64-bit architectures. + locations->AddTemp(Location::RegisterLocation(RAX)); + locations->AddTemp(Location::RegisterLocation(RDX)); } void InstructionCodeGeneratorX86_64::VisitMethodExitHook(HMethodExitHook* instruction) { @@ -1949,8 +2002,9 @@ void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) { Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value)); } -void CodeGeneratorX86_64::MoveLocation( - Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) { +void CodeGeneratorX86_64::MoveLocation(Location dst, + Location src, + [[maybe_unused]] DataType::Type dst_type) { Move(dst, src); } @@ -2009,8 +2063,7 @@ void LocationsBuilderX86_64::VisitExit(HExit* exit) { exit->SetLocations(nullptr); } -void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { -} +void InstructionCodeGeneratorX86_64::VisitExit([[maybe_unused]] HExit* exit) {} template<class LabelType> void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond, @@ -2051,7 +2104,7 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) } else if (right.IsConstant()) { __ ucomiss(left.AsFpuRegister<XmmRegister>(), codegen_->LiteralFloatAddress( - right.GetConstant()->AsFloatConstant()->GetValue())); + right.GetConstant()->AsFloatConstant()->GetValue())); } else { DCHECK(right.IsStackSlot()); __ ucomiss(left.AsFpuRegister<XmmRegister>(), @@ -2065,7 +2118,7 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) } else if (right.IsConstant()) { __ ucomisd(left.AsFpuRegister<XmmRegister>(), codegen_->LiteralDoubleAddress( - right.GetConstant()->AsDoubleConstant()->GetValue())); + right.GetConstant()->AsDoubleConstant()->GetValue())); } else { DCHECK(right.IsDoubleStackSlot()); __ ucomisd(left.AsFpuRegister<XmmRegister>(), @@ -2657,7 +2710,7 @@ void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86_64::VisitIntConstant([[maybe_unused]] HIntConstant* constant) { // Will be generated at use site. } @@ -2667,7 +2720,7 @@ void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86_64::VisitNullConstant([[maybe_unused]] HNullConstant* constant) { // Will be generated at use site. } @@ -2677,7 +2730,7 @@ void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86_64::VisitLongConstant([[maybe_unused]] HLongConstant* constant) { // Will be generated at use site. } @@ -2687,7 +2740,7 @@ void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86_64::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) { // Will be generated at use site. } @@ -2698,7 +2751,7 @@ void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) { } void InstructionCodeGeneratorX86_64::VisitDoubleConstant( - HDoubleConstant* constant ATTRIBUTE_UNUSED) { + [[maybe_unused]] HDoubleConstant* constant) { // Will be generated at use site. } @@ -2707,7 +2760,7 @@ void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructo } void InstructionCodeGeneratorX86_64::VisitConstructorFence( - HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + [[maybe_unused]] HConstructorFence* constructor_fence) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } @@ -2723,7 +2776,7 @@ void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) { ret->SetLocations(nullptr); } -void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86_64::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) { codegen_->GenerateFrameExit(); } @@ -4972,7 +5025,7 @@ void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) { } void InstructionCodeGeneratorX86_64::VisitParameterValue( - HParameterValue* instruction ATTRIBUTE_UNUSED) { + [[maybe_unused]] HParameterValue* instruction) { // Nothing to do, the parameter is already at its location. } @@ -4983,7 +5036,7 @@ void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) { } void InstructionCodeGeneratorX86_64::VisitCurrentMethod( - HCurrentMethod* instruction ATTRIBUTE_UNUSED) { + [[maybe_unused]] HCurrentMethod* instruction) { // Nothing to do, the method is already at its location. } @@ -5062,7 +5115,7 @@ void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) { locations->SetOut(Location::Any()); } -void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86_64::VisitPhi([[maybe_unused]] HPhi* instruction) { LOG(FATAL) << "Unimplemented"; } @@ -5930,8 +5983,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { __ movsd(address, value.AsFpuRegister<XmmRegister>()); codegen_->MaybeRecordImplicitNullCheck(instruction); } else { - int64_t v = - bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); + int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); Address address_high = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t)); codegen_->MoveInt64ToAddress(address, address_high, v, instruction); @@ -6084,7 +6136,7 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, } } -void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { +void LocationsBuilderX86_64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) { LOG(FATAL) << "Unimplemented"; } @@ -6471,7 +6523,9 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { locations->SetInAt(0, Location::RequiresRegister()); } locations->SetOut(Location::RequiresRegister()); - if (load_kind == HLoadClass::LoadKind::kBssEntry) { + if (load_kind == HLoadClass::LoadKind::kBssEntry || + load_kind == HLoadClass::LoadKind::kBssEntryPublic || + load_kind == HLoadClass::LoadKind::kBssEntryPackage) { if (!gUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution and/or initialization to save everything. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); @@ -6507,9 +6561,8 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S Location out_loc = locations->Out(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); - const ReadBarrierOption read_barrier_option = cls->IsInBootImage() - ? kWithoutReadBarrier - : gCompilerReadBarrierOption; + const ReadBarrierOption read_barrier_option = + cls->IsInBootImage() ? kWithoutReadBarrier : GetCompilerReadBarrierOption(); bool generate_null_check = false; switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: { @@ -6704,7 +6757,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA /* no_rip= */ false); Label* fixup_label = codegen_->NewStringBssEntryPatch(load); // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption); + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, GetCompilerReadBarrierOption()); // No need for memory fence, thanks to the x86-64 memory model. SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load); codegen_->AddSlowPath(slow_path); @@ -6725,14 +6778,13 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA Label* fixup_label = codegen_->NewJitRootStringPatch( load->GetDexFile(), load->GetStringIndex(), load->GetString()); // /* GcRoot<mirror::String> */ out = *address - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption); + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, GetCompilerReadBarrierOption()); return; } default: break; } - // TODO: Re-add the compiler code to do string dex cache lookup again. // Custom calling convention: RAX serves as both input and output. __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_)); codegen_->InvokeRuntime(kQuickResolveString, @@ -6760,7 +6812,7 @@ void LocationsBuilderX86_64::VisitClearException(HClearException* clear) { new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); } -void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86_64::VisitClearException([[maybe_unused]] HClearException* clear) { __ gs()->movl(GetExceptionTlsAddress(), Immediate(0)); } @@ -7112,7 +7164,6 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { } else { locations->SetInAt(1, Location::Any()); } - // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86. locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -7301,11 +7352,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { kWithoutReadBarrier); // /* HeapReference<Class> */ temp = temp->iftable_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - temp_loc, - iftable_offset, - kWithoutReadBarrier); + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + iftable_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Iftable is never null. __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset)); // Maybe poison the `cls` for direct comparison with memory. @@ -7830,12 +7881,12 @@ void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instructi __ Bind(slow_path->GetExitLabel()); } -void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { +void LocationsBuilderX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; } -void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; } @@ -7930,13 +7981,13 @@ void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_ins __ jmp(temp_reg); } -void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction - ATTRIBUTE_UNUSED) { +void LocationsBuilderX86_64::VisitIntermediateAddress( + [[maybe_unused]] HIntermediateAddress* instruction) { LOG(FATAL) << "Unreachable"; } -void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction - ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86_64::VisitIntermediateAddress( + [[maybe_unused]] HIntermediateAddress* instruction) { LOG(FATAL) << "Unreachable"; } @@ -8037,9 +8088,9 @@ Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj, Location index, ScaleFactor scale, uint32_t data_offset) { - return index.IsConstant() ? - Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) : - Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset); + return index.IsConstant() + ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) + : Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset); } void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) { @@ -8119,7 +8170,7 @@ class JumpTableRIPFixup : public RIPFixup { const HPackedSwitch* switch_instr_; }; -void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { +void CodeGeneratorX86_64::Finalize() { // Generate the constant area if needed. X86_64Assembler* assembler = GetAssembler(); if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) { @@ -8137,7 +8188,7 @@ void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { } // And finish up. - CodeGenerator::Finalize(allocator); + CodeGenerator::Finalize(); } Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) { @@ -8217,7 +8268,7 @@ void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code, reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t; reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] = - dchecked_integral_cast<uint32_t>(address); + dchecked_integral_cast<uint32_t>(address); } void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index dff2e799e0..5a940c1466 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -162,16 +162,16 @@ class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention { Location GetFieldIndexLocation() const override { return Location::RegisterLocation(RDI); } - Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { + Location GetReturnLocation([[maybe_unused]] DataType::Type type) const override { return Location::RegisterLocation(RAX); } - Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, bool is_instance) - const override { + Location GetSetValueLocation([[maybe_unused]] DataType::Type type, + bool is_instance) const override { return is_instance ? Location::RegisterLocation(RDX) : Location::RegisterLocation(RSI); } - Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { + Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override { return Location::FpuRegisterLocation(XMM0); } @@ -468,7 +468,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { void SetupBlockedRegisters() const override; void DumpCoreRegister(std::ostream& stream, int reg) const override; void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; - void Finalize(CodeAllocator* allocator) override; + void Finalize() override; InstructionSet GetInstructionSet() const override { return InstructionSet::kX86_64; @@ -502,9 +502,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { block_labels_ = CommonInitializeLabels<Label>(); } - bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override { - return false; - } + bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; } // Check if the desired_string_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc index d759a16f48..33b5bd5169 100644 --- a/compiler/optimizing/code_sinking.cc +++ b/compiler/optimizing/code_sinking.cc @@ -16,6 +16,9 @@ #include "code_sinking.h" +#include <sstream> + +#include "android-base/logging.h" #include "base/arena_bit_vector.h" #include "base/array_ref.h" #include "base/bit_vector-inl.h" @@ -335,10 +338,6 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { processed_instructions.ClearAllBits(); ArenaBitVector post_dominated(&allocator, graph_->GetBlocks().size(), /* expandable= */ false); post_dominated.ClearAllBits(); - ArenaBitVector instructions_that_can_move( - &allocator, number_of_instructions, /* expandable= */ false); - instructions_that_can_move.ClearAllBits(); - ScopedArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc)); // Step (1): Visit post order to get a subset of blocks post dominated by `end_block`. // TODO(ngeoffray): Getting the full set of post-dominated should be done by @@ -411,6 +410,13 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { HBasicBlock* common_dominator = finder.Get(); // Step (2): iterate over the worklist to find sinking candidates. + ArenaBitVector instructions_that_can_move( + &allocator, number_of_instructions, /* expandable= */ false); + instructions_that_can_move.ClearAllBits(); + ScopedArenaVector<ScopedArenaVector<HInstruction*>> instructions_to_move( + graph_->GetBlocks().size(), + ScopedArenaVector<HInstruction*>(allocator.Adapter(kArenaAllocMisc)), + allocator.Adapter(kArenaAllocMisc)); while (!worklist.empty()) { HInstruction* instruction = worklist.back(); if (processed_instructions.IsBitSet(instruction->GetId())) { @@ -467,7 +473,7 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { // Instruction is a candidate for being sunk. Mark it as such, remove it from the // work list, and add its inputs to the work list. instructions_that_can_move.SetBit(instruction->GetId()); - move_in_order.push_back(instruction); + instructions_to_move[instruction->GetBlock()->GetBlockId()].push_back(instruction); processed_instructions.SetBit(instruction->GetId()); worklist.pop_back(); AddInputs(instruction, processed_instructions, post_dominated, &worklist); @@ -493,14 +499,50 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { } } - // Make sure we process instructions in dominated order. This is required for heap - // stores. - std::sort(move_in_order.begin(), move_in_order.end(), [](HInstruction* a, HInstruction* b) { - return b->StrictlyDominates(a); - }); + // We want to process the instructions in reverse dominated order. This is required for heap + // stores. To guarantee this (including the transitivity of incomparability) we have some extra + // bookkeeping. + ScopedArenaVector<HInstruction*> instructions_to_move_sorted(allocator.Adapter(kArenaAllocMisc)); + for (HBasicBlock* block : graph_->GetPostOrder()) { + const int block_id = block->GetBlockId(); + + // Order the block itself first. + std::sort(instructions_to_move[block_id].begin(), + instructions_to_move[block_id].end(), + [&block](HInstruction* a, HInstruction* b) { + return block->GetInstructions().FoundBefore(b, a); + }); + + for (HInstruction* instruction : instructions_to_move[block_id]) { + instructions_to_move_sorted.push_back(instruction); + } + } + + if (kIsDebugBuild) { + // We should have ordered the instructions in reverse dominated order. This means that + // instructions shouldn't dominate instructions that come after it in the vector. + for (size_t i = 0; i < instructions_to_move_sorted.size(); ++i) { + for (size_t j = i + 1; j < instructions_to_move_sorted.size(); ++j) { + if (instructions_to_move_sorted[i]->StrictlyDominates(instructions_to_move_sorted[j])) { + std::stringstream ss; + graph_->Dump(ss, nullptr); + ss << "\n" + << "{"; + for (HInstruction* instr : instructions_to_move_sorted) { + ss << *instr << " in block: " << instr->GetBlock() << ", "; + } + ss << "}\n"; + ss << "i = " << i << " which is " << *instructions_to_move_sorted[i] + << "strictly dominates j = " << j << " which is " << *instructions_to_move_sorted[j] + << "\n"; + LOG(FATAL) << "Unexpected ordering of code sinking instructions: " << ss.str(); + } + } + } + } // Step (3): Try to move sinking candidates. - for (HInstruction* instruction : move_in_order) { + for (HInstruction* instruction : instructions_to_move_sorted) { HInstruction* position = nullptr; if (instruction->IsArraySet() || instruction->IsInstanceFieldSet() diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 2d9acc49b3..c72d3ea24a 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -733,8 +733,7 @@ TEST_F(CodegenTest, ARMVIXLParallelMoveResolver) { move->AddMove(Location::StackSlot(8192), Location::StackSlot(0), DataType::Type::kInt32, nullptr); codegen.GetMoveResolver()->EmitNativeCode(move); - InternalCodeAllocator code_allocator; - codegen.Finalize(&code_allocator); + codegen.Finalize(); } #endif @@ -785,8 +784,7 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) { nullptr); codegen.GetMoveResolver()->EmitNativeCode(move); - InternalCodeAllocator code_allocator; - codegen.Finalize(&code_allocator); + codegen.Finalize(); } // Check that ParallelMoveResolver works fine for ARM64 for both cases when SIMD is on and off. @@ -798,7 +796,7 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) { codegen.Initialize(); - graph->SetHasSIMD(true); + graph->SetHasTraditionalSIMD(true); for (int i = 0; i < 2; i++) { HParallelMove* move = new (graph->GetAllocator()) HParallelMove(graph->GetAllocator()); move->AddMove(Location::SIMDStackSlot(0), @@ -818,11 +816,10 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) { DataType::Type::kFloat64, nullptr); codegen.GetMoveResolver()->EmitNativeCode(move); - graph->SetHasSIMD(false); + graph->SetHasTraditionalSIMD(false); } - InternalCodeAllocator code_allocator; - codegen.Finalize(&code_allocator); + codegen.Finalize(); } // Check that ART ISA Features are propagated to VIXL for arm64 (using cortex-a75 as example). @@ -867,7 +864,7 @@ TEST_F(CodegenTest, ARM64FrameSizeSIMD) { arm64::CodeGeneratorARM64 codegen(graph, *compiler_options); codegen.Initialize(); - graph->SetHasSIMD(true); + graph->SetHasTraditionalSIMD(true); DCHECK_EQ(arm64::callee_saved_fp_registers.GetCount(), 8); vixl::aarch64::CPURegList reg_list = arm64::callee_saved_fp_registers; @@ -887,7 +884,8 @@ TEST_F(CodegenTest, ARM64FrameSizeNoSIMD) { arm64::CodeGeneratorARM64 codegen(graph, *compiler_options); codegen.Initialize(); - graph->SetHasSIMD(false); + graph->SetHasTraditionalSIMD(false); + graph->SetHasPredicatedSIMD(false); DCHECK_EQ(arm64::callee_saved_fp_registers.GetCount(), 8); vixl::aarch64::CPURegList reg_list = arm64::callee_saved_fp_registers; diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h index 7af9d0f44c..a8425c9915 100644 --- a/compiler/optimizing/codegen_test_utils.h +++ b/compiler/optimizing/codegen_test_utils.h @@ -103,8 +103,8 @@ class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL { blocked_core_registers_[arm::R7] = false; } - void MaybeGenerateMarkingRegisterCheck(int code ATTRIBUTE_UNUSED, - Location temp_loc ATTRIBUTE_UNUSED) override { + void MaybeGenerateMarkingRegisterCheck([[maybe_unused]] int code, + [[maybe_unused]] Location temp_loc) override { // When turned on, the marking register checks in // CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck expects the // Thread Register and the Marking Register to be set to @@ -135,8 +135,8 @@ class TestCodeGeneratorARM64 : public arm64::CodeGeneratorARM64 { TestCodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options) : arm64::CodeGeneratorARM64(graph, compiler_options) {} - void MaybeGenerateMarkingRegisterCheck(int codem ATTRIBUTE_UNUSED, - Location temp_loc ATTRIBUTE_UNUSED) override { + void MaybeGenerateMarkingRegisterCheck([[maybe_unused]] int codem, + [[maybe_unused]] Location temp_loc) override { // When turned on, the marking register checks in // CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck expect the // Thread Register and the Marking Register to be set to @@ -167,28 +167,6 @@ class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 { }; #endif -class InternalCodeAllocator : public CodeAllocator { - public: - InternalCodeAllocator() : size_(0) { } - - uint8_t* Allocate(size_t size) override { - size_ = size; - memory_.reset(new uint8_t[size]); - return memory_.get(); - } - - size_t GetSize() const { return size_; } - ArrayRef<const uint8_t> GetMemory() const override { - return ArrayRef<const uint8_t>(memory_.get(), size_); - } - - private: - size_t size_; - std::unique_ptr<uint8_t[]> memory_; - - DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator); -}; - static bool CanExecuteOnHardware(InstructionSet target_isa) { return (target_isa == kRuntimeISA) // Handle the special case of ARM, with two instructions sets (ARM32 and Thumb-2). @@ -247,8 +225,7 @@ static void VerifyGeneratedCode(InstructionSet target_isa, } template <typename Expected> -static void Run(const InternalCodeAllocator& allocator, - const CodeGenerator& codegen, +static void Run(const CodeGenerator& codegen, bool has_result, Expected expected) { InstructionSet target_isa = codegen.GetInstructionSet(); @@ -260,7 +237,7 @@ static void Run(const InternalCodeAllocator& allocator, }; CodeHolder code_holder; const void* method_code = - code_holder.MakeExecutable(allocator.GetMemory(), ArrayRef<const uint8_t>(), target_isa); + code_holder.MakeExecutable(codegen.GetCode(), ArrayRef<const uint8_t>(), target_isa); using fptr = Expected (*)(); fptr f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(method_code)); @@ -294,9 +271,8 @@ static void RunCodeNoCheck(CodeGenerator* codegen, register_allocator->AllocateRegisters(); } hook_before_codegen(graph); - InternalCodeAllocator allocator; - codegen->Compile(&allocator); - Run(allocator, *codegen, has_result, expected); + codegen->Compile(); + Run(*codegen, has_result, expected); } template <typename Expected> diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index 20b0e38af5..e2ef8d52f2 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -311,10 +311,8 @@ inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* } } -inline Location ARM64EncodableConstantOrRegister(HInstruction* constant, - HInstruction* instr) { - if (constant->IsConstant() - && Arm64CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { +inline Location ARM64EncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) { + if (constant->IsConstant() && Arm64CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { return Location::ConstantLocation(constant); } diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc index 06d19e3f29..e20d9e83e6 100644 --- a/compiler/optimizing/constant_folding.cc +++ b/compiler/optimizing/constant_folding.cc @@ -37,6 +37,13 @@ class HConstantFoldingVisitor final : public HGraphDelegateVisitor { void VisitUnaryOperation(HUnaryOperation* inst) override; void VisitBinaryOperation(HBinaryOperation* inst) override; + // Tries to replace constants in binary operations like: + // * BinaryOp(Select(false_constant, true_constant, condition), other_constant), or + // * BinaryOp(other_constant, Select(false_constant, true_constant, condition)) + // with consolidated constants. For example, Add(Select(10, 20, condition), 5) can be replaced + // with Select(15, 25, condition). + bool TryRemoveBinaryOperationViaSelect(HBinaryOperation* inst); + void VisitArrayLength(HArrayLength* inst) override; void VisitDivZeroCheck(HDivZeroCheck* inst) override; void VisitIf(HIf* inst) override; @@ -113,9 +120,69 @@ void HConstantFoldingVisitor::VisitUnaryOperation(HUnaryOperation* inst) { if (constant != nullptr) { inst->ReplaceWith(constant); inst->GetBlock()->RemoveInstruction(inst); + } else if (inst->InputAt(0)->IsSelect() && inst->InputAt(0)->HasOnlyOneNonEnvironmentUse()) { + // Try to replace the select's inputs in Select+UnaryOperation cases. We can do this if both + // inputs to the select are constants, and this is the only use of the select. + HSelect* select = inst->InputAt(0)->AsSelect(); + HConstant* false_constant = inst->TryStaticEvaluation(select->GetFalseValue()); + if (false_constant == nullptr) { + return; + } + HConstant* true_constant = inst->TryStaticEvaluation(select->GetTrueValue()); + if (true_constant == nullptr) { + return; + } + DCHECK_EQ(select->InputAt(0), select->GetFalseValue()); + DCHECK_EQ(select->InputAt(1), select->GetTrueValue()); + select->ReplaceInput(false_constant, 0); + select->ReplaceInput(true_constant, 1); + select->UpdateType(); + inst->ReplaceWith(select); + inst->GetBlock()->RemoveInstruction(inst); } } +bool HConstantFoldingVisitor::TryRemoveBinaryOperationViaSelect(HBinaryOperation* inst) { + if (inst->GetLeft()->IsSelect() == inst->GetRight()->IsSelect()) { + // If both of them are constants, VisitBinaryOperation already tried the static evaluation. If + // both of them are selects, then we can't simplify. + // TODO(solanes): Technically, if both of them are selects we could simplify iff both select's + // conditions are equal e.g. Add(Select(1, 2, cond), Select(3, 4, cond)) could be replaced with + // Select(4, 6, cond). This seems very unlikely to happen so we don't implement it. + return false; + } + + const bool left_is_select = inst->GetLeft()->IsSelect(); + HSelect* select = left_is_select ? inst->GetLeft()->AsSelect() : inst->GetRight()->AsSelect(); + HInstruction* maybe_constant = left_is_select ? inst->GetRight() : inst->GetLeft(); + + if (select->HasOnlyOneNonEnvironmentUse()) { + // Try to replace the select's inputs in Select+BinaryOperation. We can do this if both + // inputs to the select are constants, and this is the only use of the select. + HConstant* false_constant = + inst->TryStaticEvaluation(left_is_select ? select->GetFalseValue() : maybe_constant, + left_is_select ? maybe_constant : select->GetFalseValue()); + if (false_constant == nullptr) { + return false; + } + HConstant* true_constant = + inst->TryStaticEvaluation(left_is_select ? select->GetTrueValue() : maybe_constant, + left_is_select ? maybe_constant : select->GetTrueValue()); + if (true_constant == nullptr) { + return false; + } + DCHECK_EQ(select->InputAt(0), select->GetFalseValue()); + DCHECK_EQ(select->InputAt(1), select->GetTrueValue()); + select->ReplaceInput(false_constant, 0); + select->ReplaceInput(true_constant, 1); + select->UpdateType(); + inst->ReplaceWith(select); + inst->GetBlock()->RemoveInstruction(inst); + return true; + } + return false; +} + void HConstantFoldingVisitor::VisitBinaryOperation(HBinaryOperation* inst) { // Constant folding: replace `op(a, b)' with a constant at // compile time if `a' and `b' are both constants. @@ -123,6 +190,8 @@ void HConstantFoldingVisitor::VisitBinaryOperation(HBinaryOperation* inst) { if (constant != nullptr) { inst->ReplaceWith(constant); inst->GetBlock()->RemoveInstruction(inst); + } else if (TryRemoveBinaryOperationViaSelect(inst)) { + // Already replaced inside TryRemoveBinaryOperationViaSelect. } else { InstructionWithAbsorbingInputSimplifier simplifier(GetGraph()); inst->Accept(&simplifier); @@ -299,6 +368,25 @@ void HConstantFoldingVisitor::VisitTypeConversion(HTypeConversion* inst) { if (constant != nullptr) { inst->ReplaceWith(constant); inst->GetBlock()->RemoveInstruction(inst); + } else if (inst->InputAt(0)->IsSelect() && inst->InputAt(0)->HasOnlyOneNonEnvironmentUse()) { + // Try to replace the select's inputs in Select+TypeConversion. We can do this if both + // inputs to the select are constants, and this is the only use of the select. + HSelect* select = inst->InputAt(0)->AsSelect(); + HConstant* false_constant = inst->TryStaticEvaluation(select->GetFalseValue()); + if (false_constant == nullptr) { + return; + } + HConstant* true_constant = inst->TryStaticEvaluation(select->GetTrueValue()); + if (true_constant == nullptr) { + return; + } + DCHECK_EQ(select->InputAt(0), select->GetFalseValue()); + DCHECK_EQ(select->InputAt(1), select->GetTrueValue()); + select->ReplaceInput(false_constant, 0); + select->ReplaceInput(true_constant, 1); + select->UpdateType(); + inst->ReplaceWith(select); + inst->GetBlock()->RemoveInstruction(inst); } } @@ -583,7 +671,7 @@ void InstructionWithAbsorbingInputSimplifier::VisitRem(HRem* instruction) { block->RemoveInstruction(instruction); } - HConstant* cst_right = instruction->GetRight()->AsConstant(); + HConstant* cst_right = instruction->GetRight()->AsConstantOrNull(); if (((cst_right != nullptr) && (cst_right->IsOne() || cst_right->IsMinusOne())) || (instruction->GetLeft() == instruction->GetRight())) { diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.cc b/compiler/optimizing/constructor_fence_redundancy_elimination.cc index d9b7652f32..48635cfd15 100644 --- a/compiler/optimizing/constructor_fence_redundancy_elimination.cc +++ b/compiler/optimizing/constructor_fence_redundancy_elimination.cc @@ -78,7 +78,7 @@ class CFREVisitor final : public HGraphVisitor { VisitSetLocation(instruction, value); } - void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) override { + void VisitDeoptimize([[maybe_unused]] HDeoptimize* instruction) override { // Pessimize: Merge all fences. MergeCandidateFences(); } @@ -151,7 +151,7 @@ class CFREVisitor final : public HGraphVisitor { } } - void VisitSetLocation(HInstruction* inst ATTRIBUTE_UNUSED, HInstruction* store_input) { + void VisitSetLocation([[maybe_unused]] HInstruction* inst, HInstruction* store_input) { // An object is considered "published" if it's stored onto the heap. // Sidenote: A later "LSE" pass can still remove the fence if it proves the // object doesn't actually escape. diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index cf49e39849..8e6b6db236 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -24,6 +24,7 @@ #include "base/scoped_arena_containers.h" #include "base/stl_util.h" #include "optimizing/nodes.h" +#include "optimizing/nodes_vector.h" #include "ssa_phi_elimination.h" namespace art HIDDEN { @@ -842,7 +843,8 @@ void HDeadCodeElimination::RemoveDeadInstructions() { void HDeadCodeElimination::UpdateGraphFlags() { bool has_monitor_operations = false; - bool has_simd = false; + bool has_traditional_simd = false; + bool has_predicated_simd = false; bool has_bounds_checks = false; bool has_always_throwing_invokes = false; @@ -852,7 +854,12 @@ void HDeadCodeElimination::UpdateGraphFlags() { if (instruction->IsMonitorOperation()) { has_monitor_operations = true; } else if (instruction->IsVecOperation()) { - has_simd = true; + HVecOperation* vec_instruction = instruction->AsVecOperation(); + if (vec_instruction->IsPredicated()) { + has_predicated_simd = true; + } else { + has_traditional_simd = true; + } } else if (instruction->IsBoundsCheck()) { has_bounds_checks = true; } else if (instruction->IsInvoke() && instruction->AsInvoke()->AlwaysThrows()) { @@ -862,7 +869,8 @@ void HDeadCodeElimination::UpdateGraphFlags() { } graph_->SetHasMonitorOperations(has_monitor_operations); - graph_->SetHasSIMD(has_simd); + graph_->SetHasTraditionalSIMD(has_traditional_simd); + graph_->SetHasPredicatedSIMD(has_predicated_simd); graph_->SetHasBoundsChecks(has_bounds_checks); graph_->SetHasAlwaysThrowingInvokes(has_always_throwing_invokes); } diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 190b362145..31ba3fe98a 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -168,52 +168,68 @@ void GraphChecker::CheckGraphFlags() { void GraphChecker::VisitBasicBlock(HBasicBlock* block) { current_block_ = block; - // Use local allocator for allocating memory. - ScopedArenaAllocator allocator(GetGraph()->GetArenaStack()); - - // Check consistency with respect to predecessors of `block`. - // Note: Counting duplicates with a sorted vector uses up to 6x less memory - // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse. - ScopedArenaVector<HBasicBlock*> sorted_predecessors(allocator.Adapter(kArenaAllocGraphChecker)); - sorted_predecessors.assign(block->GetPredecessors().begin(), block->GetPredecessors().end()); - std::sort(sorted_predecessors.begin(), sorted_predecessors.end()); - for (auto it = sorted_predecessors.begin(), end = sorted_predecessors.end(); it != end; ) { - HBasicBlock* p = *it++; - size_t p_count_in_block_predecessors = 1u; - for (; it != end && *it == p; ++it) { - ++p_count_in_block_predecessors; - } - size_t block_count_in_p_successors = - std::count(p->GetSuccessors().begin(), p->GetSuccessors().end(), block); - if (p_count_in_block_predecessors != block_count_in_p_successors) { - AddError(StringPrintf( - "Block %d lists %zu occurrences of block %d in its predecessors, whereas " - "block %d lists %zu occurrences of block %d in its successors.", - block->GetBlockId(), p_count_in_block_predecessors, p->GetBlockId(), - p->GetBlockId(), block_count_in_p_successors, block->GetBlockId())); - } - } + { + // Use local allocator for allocating memory. We use C++ scopes (i.e. `{}`) to reclaim the + // memory as soon as possible, and to end the scope of this `ScopedArenaAllocator`. + ScopedArenaAllocator allocator(GetGraph()->GetArenaStack()); - // Check consistency with respect to successors of `block`. - // Note: Counting duplicates with a sorted vector uses up to 6x less memory - // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse. - ScopedArenaVector<HBasicBlock*> sorted_successors(allocator.Adapter(kArenaAllocGraphChecker)); - sorted_successors.assign(block->GetSuccessors().begin(), block->GetSuccessors().end()); - std::sort(sorted_successors.begin(), sorted_successors.end()); - for (auto it = sorted_successors.begin(), end = sorted_successors.end(); it != end; ) { - HBasicBlock* s = *it++; - size_t s_count_in_block_successors = 1u; - for (; it != end && *it == s; ++it) { - ++s_count_in_block_successors; + { + // Check consistency with respect to predecessors of `block`. + // Note: Counting duplicates with a sorted vector uses up to 6x less memory + // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse. + ScopedArenaVector<HBasicBlock*> sorted_predecessors( + allocator.Adapter(kArenaAllocGraphChecker)); + sorted_predecessors.assign(block->GetPredecessors().begin(), block->GetPredecessors().end()); + std::sort(sorted_predecessors.begin(), sorted_predecessors.end()); + for (auto it = sorted_predecessors.begin(), end = sorted_predecessors.end(); it != end;) { + HBasicBlock* p = *it++; + size_t p_count_in_block_predecessors = 1u; + for (; it != end && *it == p; ++it) { + ++p_count_in_block_predecessors; + } + size_t block_count_in_p_successors = + std::count(p->GetSuccessors().begin(), p->GetSuccessors().end(), block); + if (p_count_in_block_predecessors != block_count_in_p_successors) { + AddError(StringPrintf( + "Block %d lists %zu occurrences of block %d in its predecessors, whereas " + "block %d lists %zu occurrences of block %d in its successors.", + block->GetBlockId(), + p_count_in_block_predecessors, + p->GetBlockId(), + p->GetBlockId(), + block_count_in_p_successors, + block->GetBlockId())); + } + } } - size_t block_count_in_s_predecessors = - std::count(s->GetPredecessors().begin(), s->GetPredecessors().end(), block); - if (s_count_in_block_successors != block_count_in_s_predecessors) { - AddError(StringPrintf( - "Block %d lists %zu occurrences of block %d in its successors, whereas " - "block %d lists %zu occurrences of block %d in its predecessors.", - block->GetBlockId(), s_count_in_block_successors, s->GetBlockId(), - s->GetBlockId(), block_count_in_s_predecessors, block->GetBlockId())); + + { + // Check consistency with respect to successors of `block`. + // Note: Counting duplicates with a sorted vector uses up to 6x less memory + // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse. + ScopedArenaVector<HBasicBlock*> sorted_successors(allocator.Adapter(kArenaAllocGraphChecker)); + sorted_successors.assign(block->GetSuccessors().begin(), block->GetSuccessors().end()); + std::sort(sorted_successors.begin(), sorted_successors.end()); + for (auto it = sorted_successors.begin(), end = sorted_successors.end(); it != end;) { + HBasicBlock* s = *it++; + size_t s_count_in_block_successors = 1u; + for (; it != end && *it == s; ++it) { + ++s_count_in_block_successors; + } + size_t block_count_in_s_predecessors = + std::count(s->GetPredecessors().begin(), s->GetPredecessors().end(), block); + if (s_count_in_block_successors != block_count_in_s_predecessors) { + AddError( + StringPrintf("Block %d lists %zu occurrences of block %d in its successors, whereas " + "block %d lists %zu occurrences of block %d in its predecessors.", + block->GetBlockId(), + s_count_in_block_successors, + s->GetBlockId(), + s->GetBlockId(), + block_count_in_s_predecessors, + block->GetBlockId())); + } + } } } @@ -587,21 +603,38 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { } // Ensure 'instruction' has pointers to its inputs' use entries. - auto&& input_records = instruction->GetInputRecords(); - for (size_t i = 0; i < input_records.size(); ++i) { - const HUserRecord<HInstruction*>& input_record = input_records[i]; - HInstruction* input = input_record.GetInstruction(); - if ((input_record.GetBeforeUseNode() == input->GetUses().end()) || - (input_record.GetUseNode() == input->GetUses().end()) || - !input->GetUses().ContainsNode(*input_record.GetUseNode()) || - (input_record.GetUseNode()->GetIndex() != i)) { - AddError(StringPrintf("Instruction %s:%d has an invalid iterator before use entry " - "at input %u (%s:%d).", - instruction->DebugName(), - instruction->GetId(), - static_cast<unsigned>(i), - input->DebugName(), - input->GetId())); + { + auto&& input_records = instruction->GetInputRecords(); + for (size_t i = 0; i < input_records.size(); ++i) { + const HUserRecord<HInstruction*>& input_record = input_records[i]; + HInstruction* input = input_record.GetInstruction(); + + // Populate bookkeeping, if needed. See comment in graph_checker.h for uses_per_instruction_. + auto it = uses_per_instruction_.find(input->GetId()); + if (it == uses_per_instruction_.end()) { + it = uses_per_instruction_ + .insert({input->GetId(), + ScopedArenaSet<const art::HUseListNode<art::HInstruction*>*>( + allocator_.Adapter(kArenaAllocGraphChecker))}) + .first; + for (auto&& use : input->GetUses()) { + it->second.insert(std::addressof(use)); + } + } + + if ((input_record.GetBeforeUseNode() == input->GetUses().end()) || + (input_record.GetUseNode() == input->GetUses().end()) || + (it->second.find(std::addressof(*input_record.GetUseNode())) == it->second.end()) || + (input_record.GetUseNode()->GetIndex() != i)) { + AddError( + StringPrintf("Instruction %s:%d has an invalid iterator before use entry " + "at input %u (%s:%d).", + instruction->DebugName(), + instruction->GetId(), + static_cast<unsigned>(i), + input->DebugName(), + input->GetId())); + } } } @@ -944,8 +977,7 @@ static bool IsSameSizeConstant(const HInstruction* insn1, const HInstruction* in static bool IsConstantEquivalent(const HInstruction* insn1, const HInstruction* insn2, BitVector* visited) { - if (insn1->IsPhi() && - insn1->AsPhi()->IsVRegEquivalentOf(insn2)) { + if (insn1->IsPhi() && insn1->AsPhi()->IsVRegEquivalentOf(insn2)) { HConstInputsRef insn1_inputs = insn1->GetInputs(); HConstInputsRef insn2_inputs = insn2->GetInputs(); if (insn1_inputs.size() != insn2_inputs.size()) { diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index d6644f3b50..aff2358411 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -22,7 +22,7 @@ #include "base/arena_bit_vector.h" #include "base/bit_vector-inl.h" #include "base/macros.h" -#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" #include "nodes.h" namespace art HIDDEN { @@ -35,12 +35,13 @@ class GraphChecker : public HGraphDelegateVisitor { explicit GraphChecker(HGraph* graph, CodeGenerator* codegen = nullptr, const char* dump_prefix = "art::GraphChecker: ") - : HGraphDelegateVisitor(graph), - errors_(graph->GetAllocator()->Adapter(kArenaAllocGraphChecker)), - dump_prefix_(dump_prefix), - allocator_(graph->GetArenaStack()), - seen_ids_(&allocator_, graph->GetCurrentInstructionId(), false, kArenaAllocGraphChecker), - codegen_(codegen) { + : HGraphDelegateVisitor(graph), + errors_(graph->GetAllocator()->Adapter(kArenaAllocGraphChecker)), + dump_prefix_(dump_prefix), + allocator_(graph->GetArenaStack()), + seen_ids_(&allocator_, graph->GetCurrentInstructionId(), false, kArenaAllocGraphChecker), + uses_per_instruction_(allocator_.Adapter(kArenaAllocGraphChecker)), + codegen_(codegen) { seen_ids_.ClearAllBits(); } @@ -107,7 +108,7 @@ class GraphChecker : public HGraphDelegateVisitor { } } - protected: + private: // Report a new error. void AddError(const std::string& error) { errors_.push_back(error); @@ -118,7 +119,6 @@ class GraphChecker : public HGraphDelegateVisitor { // Errors encountered while checking the graph. ArenaVector<std::string> errors_; - private: void VisitReversePostOrder(); // Checks that the graph's flags are set correctly. @@ -129,6 +129,13 @@ class GraphChecker : public HGraphDelegateVisitor { ScopedArenaAllocator allocator_; ArenaBitVector seen_ids_; + // As part of VisitInstruction, we verify that the instruction's input_record is present in the + // corresponding input's GetUses. If an instruction is used in many places (e.g. 200K+ uses), the + // linear search through GetUses is too slow. We can use bookkeeping to search in a set, instead + // of a list. + ScopedArenaSafeMap<int, ScopedArenaSet<const art::HUseListNode<art::HInstruction*>*>> + uses_per_instruction_; + // Used to access target information. CodeGenerator* codegen_; diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 73bdd1e223..bd33fde907 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -610,6 +610,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { } void VisitVecMemoryOperation(HVecMemoryOperation* vec_mem_operation) override { + VisitVecOperation(vec_mem_operation); StartAttributeStream("alignment") << vec_mem_operation->GetAlignment().ToString(); } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 5a4478dc14..91be79f8ec 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -180,7 +180,7 @@ bool HInliner::Run() { for (HBasicBlock* block : blocks) { for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr;) { HInstruction* next = instruction->GetNext(); - HInvoke* call = instruction->AsInvoke(); + HInvoke* call = instruction->AsInvokeOrNull(); // As long as the call is not intrinsified, it is worth trying to inline. if (call != nullptr && !codegen_->IsImplementedIntrinsic(call)) { if (honor_noinline_directives) { @@ -702,12 +702,14 @@ HInliner::InlineCacheType HInliner::GetInlineCacheAOT( // Walk over the class descriptors and look up the actual classes. // If we cannot find a type we return kInlineCacheMissingTypes. ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); + Thread* self = Thread::Current(); for (const dex::TypeIndex& type_index : dex_pc_data.classes) { const DexFile* dex_file = caller_compilation_unit_.GetDexFile(); const char* descriptor = pci->GetTypeDescriptor(dex_file, type_index); - ObjPtr<mirror::ClassLoader> class_loader = caller_compilation_unit_.GetClassLoader().Get(); - ObjPtr<mirror::Class> clazz = class_linker->LookupResolvedType(descriptor, class_loader); + ObjPtr<mirror::Class> clazz = + class_linker->FindClass(self, descriptor, caller_compilation_unit_.GetClassLoader()); if (clazz == nullptr) { + self->ClearException(); // Clean up the exception left by type resolution. VLOG(compiler) << "Could not find class from inline cache in AOT mode " << invoke_instruction->GetMethodReference().PrettyMethod() << " : " diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index fee9091145..fd599f789e 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -1364,8 +1364,7 @@ bool HInstructionBuilder::BuildInvokePolymorphic(uint32_t dex_pc, method_reference, resolved_method, resolved_method_reference, - proto_idx, - !graph_->IsDebuggable()); + proto_idx); if (!HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false)) { return false; } @@ -2365,9 +2364,9 @@ void HInstructionBuilder::BuildCheckedDivRem(uint16_t out_vreg, second = LoadLocal(second_vreg_or_constant, type); } - if (!second_is_constant - || (type == DataType::Type::kInt32 && second->AsIntConstant()->GetValue() == 0) - || (type == DataType::Type::kInt64 && second->AsLongConstant()->GetValue() == 0)) { + if (!second_is_constant || + (type == DataType::Type::kInt32 && second->AsIntConstant()->GetValue() == 0) || + (type == DataType::Type::kInt64 && second->AsLongConstant()->GetValue() == 0)) { second = new (allocator_) HDivZeroCheck(second, dex_pc); AppendInstruction(second); } diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 0c2fd5de56..0e2a62226f 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -1050,51 +1050,60 @@ void InstructionSimplifierVisitor::VisitSelect(HSelect* select) { HInstruction* b = condition->InputAt(1); DataType::Type t_type = true_value->GetType(); DataType::Type f_type = false_value->GetType(); - // Here we have a <cmp> b ? true_value : false_value. - // Test if both values are compatible integral types (resulting MIN/MAX/ABS - // type will be int or long, like the condition). Replacements are general, - // but assume conditions prefer constants on the right. if (DataType::IsIntegralType(t_type) && DataType::Kind(t_type) == DataType::Kind(f_type)) { - // Allow a < 100 ? max(a, -100) : .. - // or a > -100 ? min(a, 100) : .. - // to use min/max instead of a to detect nested min/max expressions. - HInstruction* new_a = AllowInMinMax(cmp, a, b, true_value); - if (new_a != nullptr) { - a = new_a; - } - // Try to replace typical integral MIN/MAX/ABS constructs. - if ((cmp == kCondLT || cmp == kCondLE || cmp == kCondGT || cmp == kCondGE) && - ((a == true_value && b == false_value) || - (b == true_value && a == false_value))) { - // Found a < b ? a : b (MIN) or a < b ? b : a (MAX) - // or a > b ? a : b (MAX) or a > b ? b : a (MIN). - bool is_min = (cmp == kCondLT || cmp == kCondLE) == (a == true_value); - replace_with = NewIntegralMinMax(GetGraph()->GetAllocator(), a, b, select, is_min); - } else if (((cmp == kCondLT || cmp == kCondLE) && true_value->IsNeg()) || - ((cmp == kCondGT || cmp == kCondGE) && false_value->IsNeg())) { - bool negLeft = (cmp == kCondLT || cmp == kCondLE); - HInstruction* the_negated = negLeft ? true_value->InputAt(0) : false_value->InputAt(0); - HInstruction* not_negated = negLeft ? false_value : true_value; - if (a == the_negated && a == not_negated && IsInt64Value(b, 0)) { - // Found a < 0 ? -a : a - // or a > 0 ? a : -a - // which can be replaced by ABS(a). - replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), a, select); + if (cmp == kCondEQ || cmp == kCondNE) { + // Turns + // * Select[a, b, EQ(a,b)] / Select[a, b, EQ(b,a)] into a + // * Select[a, b, NE(a,b)] / Select[a, b, NE(b,a)] into b + // Note that the order in EQ/NE is irrelevant. + if ((a == true_value && b == false_value) || (a == false_value && b == true_value)) { + replace_with = cmp == kCondEQ ? false_value : true_value; + } + } else { + // Test if both values are compatible integral types (resulting MIN/MAX/ABS + // type will be int or long, like the condition). Replacements are general, + // but assume conditions prefer constants on the right. + + // Allow a < 100 ? max(a, -100) : .. + // or a > -100 ? min(a, 100) : .. + // to use min/max instead of a to detect nested min/max expressions. + HInstruction* new_a = AllowInMinMax(cmp, a, b, true_value); + if (new_a != nullptr) { + a = new_a; } - } else if (true_value->IsSub() && false_value->IsSub()) { - HInstruction* true_sub1 = true_value->InputAt(0); - HInstruction* true_sub2 = true_value->InputAt(1); - HInstruction* false_sub1 = false_value->InputAt(0); - HInstruction* false_sub2 = false_value->InputAt(1); - if ((((cmp == kCondGT || cmp == kCondGE) && - (a == true_sub1 && b == true_sub2 && a == false_sub2 && b == false_sub1)) || - ((cmp == kCondLT || cmp == kCondLE) && - (a == true_sub2 && b == true_sub1 && a == false_sub1 && b == false_sub2))) && - AreLowerPrecisionArgs(t_type, a, b)) { - // Found a > b ? a - b : b - a - // or a < b ? b - a : a - b - // which can be replaced by ABS(a - b) for lower precision operands a, b. - replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select); + // Try to replace typical integral MIN/MAX/ABS constructs. + if ((cmp == kCondLT || cmp == kCondLE || cmp == kCondGT || cmp == kCondGE) && + ((a == true_value && b == false_value) || (b == true_value && a == false_value))) { + // Found a < b ? a : b (MIN) or a < b ? b : a (MAX) + // or a > b ? a : b (MAX) or a > b ? b : a (MIN). + bool is_min = (cmp == kCondLT || cmp == kCondLE) == (a == true_value); + replace_with = NewIntegralMinMax(GetGraph()->GetAllocator(), a, b, select, is_min); + } else if (((cmp == kCondLT || cmp == kCondLE) && true_value->IsNeg()) || + ((cmp == kCondGT || cmp == kCondGE) && false_value->IsNeg())) { + bool negLeft = (cmp == kCondLT || cmp == kCondLE); + HInstruction* the_negated = negLeft ? true_value->InputAt(0) : false_value->InputAt(0); + HInstruction* not_negated = negLeft ? false_value : true_value; + if (a == the_negated && a == not_negated && IsInt64Value(b, 0)) { + // Found a < 0 ? -a : a + // or a > 0 ? a : -a + // which can be replaced by ABS(a). + replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), a, select); + } + } else if (true_value->IsSub() && false_value->IsSub()) { + HInstruction* true_sub1 = true_value->InputAt(0); + HInstruction* true_sub2 = true_value->InputAt(1); + HInstruction* false_sub1 = false_value->InputAt(0); + HInstruction* false_sub2 = false_value->InputAt(1); + if ((((cmp == kCondGT || cmp == kCondGE) && + (a == true_sub1 && b == true_sub2 && a == false_sub2 && b == false_sub1)) || + ((cmp == kCondLT || cmp == kCondLE) && + (a == true_sub2 && b == true_sub1 && a == false_sub1 && b == false_sub2))) && + AreLowerPrecisionArgs(t_type, a, b)) { + // Found a > b ? a - b : b - a + // or a < b ? b - a : a - b + // which can be replaced by ABS(a - b) for lower precision operands a, b. + replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select); + } } } } @@ -1456,24 +1465,26 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { } } - HNeg* neg = left_is_neg ? left->AsNeg() : right->AsNeg(); - if (left_is_neg != right_is_neg && neg->HasOnlyOneNonEnvironmentUse()) { - // Replace code looking like - // NEG tmp, b - // ADD dst, a, tmp - // with - // SUB dst, a, b - // We do not perform the optimization if the input negation has environment - // uses or multiple non-environment uses as it could lead to worse code. In - // particular, we do not want the live range of `b` to be extended if we are - // not sure the initial 'NEG' instruction can be removed. - HInstruction* other = left_is_neg ? right : left; - HSub* sub = - new(GetGraph()->GetAllocator()) HSub(instruction->GetType(), other, neg->GetInput()); - instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub); - RecordSimplification(); - neg->GetBlock()->RemoveInstruction(neg); - return; + if (left_is_neg != right_is_neg) { + HNeg* neg = left_is_neg ? left->AsNeg() : right->AsNeg(); + if (neg->HasOnlyOneNonEnvironmentUse()) { + // Replace code looking like + // NEG tmp, b + // ADD dst, a, tmp + // with + // SUB dst, a, b + // We do not perform the optimization if the input negation has environment + // uses or multiple non-environment uses as it could lead to worse code. In + // particular, we do not want the live range of `b` to be extended if we are + // not sure the initial 'NEG' instruction can be removed. + HInstruction* other = left_is_neg ? right : left; + HSub* sub = + new(GetGraph()->GetAllocator()) HSub(instruction->GetType(), other, neg->GetInput()); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub); + RecordSimplification(); + neg->GetBlock()->RemoveInstruction(neg); + return; + } } if (TryReplaceWithRotate(instruction)) { @@ -1676,7 +1687,7 @@ static bool RecognizeAndSimplifyClassCheck(HCondition* condition) { HInstruction* input_two = condition->InputAt(1); HLoadClass* load_class = input_one->IsLoadClass() ? input_one->AsLoadClass() - : input_two->AsLoadClass(); + : input_two->AsLoadClassOrNull(); if (load_class == nullptr) { return false; } @@ -1688,8 +1699,8 @@ static bool RecognizeAndSimplifyClassCheck(HCondition* condition) { } HInstanceFieldGet* field_get = (load_class == input_one) - ? input_two->AsInstanceFieldGet() - : input_one->AsInstanceFieldGet(); + ? input_two->AsInstanceFieldGetOrNull() + : input_one->AsInstanceFieldGetOrNull(); if (field_get == nullptr) { return false; } @@ -2240,6 +2251,7 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) { } if (left->IsAdd()) { + // Cases (x + y) - y = x, and (x + y) - x = y. // Replace code patterns looking like // ADD dst1, x, y ADD dst1, x, y // SUB dst2, dst1, y SUB dst2, dst1, x @@ -2248,14 +2260,75 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) { // SUB instruction is not needed in this case, we may use // one of inputs of ADD instead. // It is applicable to integral types only. + HAdd* add = left->AsAdd(); DCHECK(DataType::IsIntegralType(type)); - if (left->InputAt(1) == right) { - instruction->ReplaceWith(left->InputAt(0)); + if (add->GetRight() == right) { + instruction->ReplaceWith(add->GetLeft()); + RecordSimplification(); + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } else if (add->GetLeft() == right) { + instruction->ReplaceWith(add->GetRight()); RecordSimplification(); instruction->GetBlock()->RemoveInstruction(instruction); return; - } else if (left->InputAt(0) == right) { - instruction->ReplaceWith(left->InputAt(1)); + } + } else if (right->IsAdd()) { + // Cases y - (x + y) = -x, and x - (x + y) = -y. + // Replace code patterns looking like + // ADD dst1, x, y ADD dst1, x, y + // SUB dst2, y, dst1 SUB dst2, x, dst1 + // with + // ADD dst1, x, y ADD dst1, x, y + // NEG x NEG y + // SUB instruction is not needed in this case, we may use + // one of inputs of ADD instead with a NEG. + // It is applicable to integral types only. + HAdd* add = right->AsAdd(); + DCHECK(DataType::IsIntegralType(type)); + if (add->GetRight() == left) { + HNeg* neg = new (GetGraph()->GetAllocator()) HNeg(add->GetType(), add->GetLeft()); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, neg); + RecordSimplification(); + return; + } else if (add->GetLeft() == left) { + HNeg* neg = new (GetGraph()->GetAllocator()) HNeg(add->GetType(), add->GetRight()); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, neg); + RecordSimplification(); + return; + } + } else if (left->IsSub()) { + // Case (x - y) - x = -y. + // Replace code patterns looking like + // SUB dst1, x, y + // SUB dst2, dst1, x + // with + // SUB dst1, x, y + // NEG y + // The second SUB is not needed in this case, we may use the second input of the first SUB + // instead with a NEG. + // It is applicable to integral types only. + HSub* sub = left->AsSub(); + DCHECK(DataType::IsIntegralType(type)); + if (sub->GetLeft() == right) { + HNeg* neg = new (GetGraph()->GetAllocator()) HNeg(sub->GetType(), sub->GetRight()); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, neg); + RecordSimplification(); + return; + } + } else if (right->IsSub()) { + // Case x - (x - y) = y. + // Replace code patterns looking like + // SUB dst1, x, y + // SUB dst2, x, dst1 + // with + // SUB dst1, x, y + // The second SUB is not needed in this case, we may use the second input of the first SUB. + // It is applicable to integral types only. + HSub* sub = right->AsSub(); + DCHECK(DataType::IsIntegralType(type)); + if (sub->GetLeft() == left) { + instruction->ReplaceWith(sub->GetRight()); RecordSimplification(); instruction->GetBlock()->RemoveInstruction(instruction); return; @@ -3215,7 +3288,7 @@ bool InstructionSimplifierVisitor::TrySubtractionChainSimplification( HInstruction* left = instruction->GetLeft(); HInstruction* right = instruction->GetRight(); // Variable names as described above. - HConstant* const2 = right->IsConstant() ? right->AsConstant() : left->AsConstant(); + HConstant* const2 = right->IsConstant() ? right->AsConstant() : left->AsConstantOrNull(); if (const2 == nullptr) { return false; } @@ -3231,7 +3304,7 @@ bool InstructionSimplifierVisitor::TrySubtractionChainSimplification( } left = y->GetLeft(); - HConstant* const1 = left->IsConstant() ? left->AsConstant() : y->GetRight()->AsConstant(); + HConstant* const1 = left->IsConstant() ? left->AsConstant() : y->GetRight()->AsConstantOrNull(); if (const1 == nullptr) { return false; } diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h index ddc3a867b8..01489f8bcb 100644 --- a/compiler/optimizing/instruction_simplifier_shared.h +++ b/compiler/optimizing/instruction_simplifier_shared.h @@ -54,7 +54,7 @@ inline bool HasShifterOperand(HInstruction* instr, InstructionSet isa) { // t3 = Sub(*, t2) inline bool IsSubRightSubLeftShl(HSub *sub) { HInstruction* right = sub->GetRight(); - return right->IsSub() && right->AsSub()->GetLeft()->IsShl();; + return right->IsSub() && right->AsSub()->GetLeft()->IsShl(); } } // namespace helpers diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 774deec438..8357e57c1f 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -27,6 +27,7 @@ #include "gc/space/image_space.h" #include "image-inl.h" #include "intrinsic_objects.h" +#include "intrinsics_list.h" #include "nodes.h" #include "obj_ptr-inl.h" #include "scoped_thread_state_change-inl.h" @@ -43,10 +44,7 @@ std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) { case Intrinsics::k ## Name: \ os << # Name; \ break; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef STATIC_INTRINSICS_LIST -#undef VIRTUAL_INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS } return os; diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 893cd04411..b6c7e1b997 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -19,6 +19,7 @@ #include "base/macros.h" #include "code_generator.h" +#include "intrinsics_list.h" #include "nodes.h" #include "optimization.h" #include "parallel_move_resolver.h" @@ -48,9 +49,7 @@ class IntrinsicVisitor : public ValueObject { case Intrinsics::k ## Name: \ Visit ## Name(invoke); \ return; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS // Do not put a default case. That way the compiler will complain if we missed a case. @@ -60,11 +59,8 @@ class IntrinsicVisitor : public ValueObject { // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, ...) \ - virtual void Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ - } -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + virtual void Visit##Name([[maybe_unused]] HInvoke* invoke) {} + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS static void MoveArguments(HInvoke* invoke, @@ -254,11 +250,9 @@ class VarHandleOptimizations : public IntrinsicOptimizations { // intrinsic to exploit e.g. no side-effects or exceptions, but otherwise not handled // by this architecture-specific intrinsics code generator. Eventually it is implemented // as a true method call. -#define UNIMPLEMENTED_INTRINSIC(Arch, Name) \ -void IntrinsicLocationsBuilder ## Arch::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ -} \ -void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ -} +#define UNIMPLEMENTED_INTRINSIC(Arch, Name) \ + void IntrinsicLocationsBuilder##Arch::Visit##Name([[maybe_unused]] HInvoke* invoke) {} \ + void IntrinsicCodeGenerator##Arch::Visit##Name([[maybe_unused]] HInvoke* invoke) {} // Defines a list of unreached intrinsics: that is, method calls that are recognized as // an intrinsic, and then always converted into HIR instructions before they reach any diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index d2dbaa32e3..2ec2134fb1 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -2582,7 +2582,7 @@ static constexpr int32_t kSystemArrayCopyCharThreshold = 192; static void SetSystemArrayCopyLocationRequires(LocationSummary* locations, uint32_t at, HInstruction* input) { - HIntConstant* const_input = input->AsIntConstant(); + HIntConstant* const_input = input->AsIntConstantOrNull(); if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) { locations->SetInAt(at, Location::RequiresRegister()); } else { @@ -2593,8 +2593,8 @@ static void SetSystemArrayCopyLocationRequires(LocationSummary* locations, void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { // Check to see if we have known failures that will cause us to have to bail out // to the runtime, and just generate the runtime call directly. - HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); - HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant(); + HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull(); + HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstantOrNull(); // The positions must be non-negative. if ((src_pos != nullptr && src_pos->GetValue() < 0) || @@ -2605,7 +2605,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { // The length must be >= 0 and not so long that we would (currently) prefer libcore's // native implementation. - HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); + HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull(); if (length != nullptr) { int32_t len = length->GetValue(); if (len < 0 || len > kSystemArrayCopyCharThreshold) { @@ -2903,8 +2903,8 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { // Check to see if we have known failures that will cause us to have to bail out // to the runtime, and just generate the runtime call directly. - HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); - HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); + HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull(); + HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull(); // The positions must be non-negative. if ((src_pos != nullptr && src_pos->GetValue() < 0) || @@ -2914,7 +2914,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { } // The length must be >= 0. - HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); + HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull(); if (length != nullptr) { int32_t len = length->GetValue(); if (len < 0 || len >= kSystemArrayCopyThreshold) { @@ -3009,8 +3009,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { __ B(intrinsic_slow_path->GetEntryLabel(), eq); } // Checked when building locations. - DCHECK(!optimizations.GetDestinationIsSource() - || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue())); + DCHECK(!optimizations.GetDestinationIsSource() || + (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue())); } else { if (!optimizations.GetDestinationIsSource()) { __ Cmp(src, dest); @@ -3676,7 +3676,7 @@ void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) { locations->SetInAt(0, Location::Any()); } -void IntrinsicCodeGeneratorARM64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } +void IntrinsicCodeGeneratorARM64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {} void IntrinsicLocationsBuilderARM64::VisitCRC32Update(HInvoke* invoke) { if (!codegen_->GetInstructionSetFeatures().HasCRC()) { @@ -4711,8 +4711,8 @@ static void GenerateVarHandleTarget(HInvoke* invoke, LocationFrom(target.object), method.X(), ArtField::DeclaringClassOffset().Int32Value(), - /*fixup_label=*/ nullptr, - gCompilerReadBarrierOption); + /*fixup_label=*/nullptr, + GetCompilerReadBarrierOption()); } } } else { diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h index a0ccf87f7b..b20cea65f4 100644 --- a/compiler/optimizing/intrinsics_arm64.h +++ b/compiler/optimizing/intrinsics_arm64.h @@ -19,6 +19,7 @@ #include "base/macros.h" #include "intrinsics.h" +#include "intrinsics_list.h" namespace vixl { namespace aarch64 { @@ -47,9 +48,7 @@ class IntrinsicLocationsBuilderARM64 final : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) override; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether @@ -72,9 +71,7 @@ class IntrinsicCodeGeneratorARM64 final : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) override; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS private: diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 266b5bc799..d31593cf9f 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -1252,9 +1252,9 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { return; } - HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); - HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); - HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); + HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull(); + HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull(); + HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull(); if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) { locations->SetInAt(1, Location::RequiresRegister()); @@ -2653,7 +2653,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitReachabilityFence(HInvoke* invoke) { locations->SetInAt(0, Location::Any()); } -void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } +void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {} void IntrinsicLocationsBuilderARMVIXL::VisitIntegerDivideUnsigned(HInvoke* invoke) { CreateIntIntToIntSlowPathCallLocations(allocator_, invoke); @@ -4351,7 +4351,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke, LocationFrom(target.object), method, ArtField::DeclaringClassOffset().Int32Value(), - gCompilerReadBarrierOption); + GetCompilerReadBarrierOption()); } } } else { diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h index 54475bcc7e..f517d21c9d 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.h +++ b/compiler/optimizing/intrinsics_arm_vixl.h @@ -19,6 +19,7 @@ #include "base/macros.h" #include "intrinsics.h" +#include "intrinsics_list.h" #include "utils/arm/assembler_arm_vixl.h" namespace art HIDDEN { @@ -36,9 +37,7 @@ class IntrinsicLocationsBuilderARMVIXL final : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) override; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether @@ -63,9 +62,7 @@ class IntrinsicCodeGeneratorARMVIXL final : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) override; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS private: diff --git a/compiler/optimizing/intrinsics_riscv64.cc b/compiler/optimizing/intrinsics_riscv64.cc new file mode 100644 index 0000000000..668b3862ad --- /dev/null +++ b/compiler/optimizing/intrinsics_riscv64.cc @@ -0,0 +1,354 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "intrinsics_riscv64.h" + +#include "code_generator_riscv64.h" + +namespace art HIDDEN { +namespace riscv64 { + +bool IntrinsicLocationsBuilderRISCV64::TryDispatch(HInvoke* invoke) { + Dispatch(invoke); + LocationSummary* res = invoke->GetLocations(); + if (res == nullptr) { + return false; + } + return res->Intrinsified(); +} + +Riscv64Assembler* IntrinsicCodeGeneratorRISCV64::GetAssembler() { + return codegen_->GetAssembler(); +} + +#define __ GetAssembler()-> + +static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + CreateFPToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + __ FMvXD(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsFpuRegister<FRegister>()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + CreateIntToFPLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + __ FMvDX(locations->Out().AsFpuRegister<FRegister>(), locations->InAt(0).AsRegister<XRegister>()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + CreateFPToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + __ FMvXW(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsFpuRegister<FRegister>()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitFloatIntBitsToFloat(HInvoke* invoke) { + CreateIntToFPLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitFloatIntBitsToFloat(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + __ FMvWX(locations->Out().AsFpuRegister<FRegister>(), locations->InAt(0).AsRegister<XRegister>()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitDoubleIsInfinite(HInvoke* invoke) { + CreateFPToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitDoubleIsInfinite(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + XRegister out = locations->Out().AsRegister<XRegister>(); + __ FClassD(out, locations->InAt(0).AsFpuRegister<FRegister>()); + __ Andi(out, out, kPositiveInfinity | kNegativeInfinity); + __ Snez(out, out); +} + +void IntrinsicLocationsBuilderRISCV64::VisitFloatIsInfinite(HInvoke* invoke) { + CreateFPToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitFloatIsInfinite(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + XRegister out = locations->Out().AsRegister<XRegister>(); + __ FClassS(out, locations->InAt(0).AsFpuRegister<FRegister>()); + __ Andi(out, out, kPositiveInfinity | kNegativeInfinity); + __ Snez(out, out); +} + +static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +template <typename EmitOp> +void EmitMemoryPeek(HInvoke* invoke, EmitOp&& emit_op) { + LocationSummary* locations = invoke->GetLocations(); + emit_op(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekByte(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekByte(HInvoke* invoke) { + EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lb(rd, rs1, 0); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekIntNative(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekIntNative(HInvoke* invoke) { + EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lw(rd, rs1, 0); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekLongNative(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekLongNative(HInvoke* invoke) { + EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Ld(rd, rs1, 0); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekShortNative(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekShortNative(HInvoke* invoke) { + EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lh(rd, rs1, 0); }); +} + +static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); +} + +template <typename EmitOp> +void EmitMemoryPoke(HInvoke* invoke, EmitOp&& emit_op) { + LocationSummary* locations = invoke->GetLocations(); + emit_op(locations->InAt(1).AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeByte(HInvoke* invoke) { + CreateIntIntToVoidLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeByte(HInvoke* invoke) { + EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sb(rs2, rs1, 0); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeIntNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeIntNative(HInvoke* invoke) { + EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sw(rs2, rs1, 0); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeLongNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeLongNative(HInvoke* invoke) { + EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sd(rs2, rs1, 0); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeShortNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeShortNative(HInvoke* invoke) { + EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sh(rs2, rs1, 0); }); +} + +template <typename EmitOp> +void EmitIntegralUnOp(HInvoke* invoke, EmitOp&& emit_op) { + LocationSummary* locations = invoke->GetLocations(); + emit_op(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitIntegerReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitIntegerReverseBytes(HInvoke* invoke) { + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { + // There is no 32-bit reverse bytes instruction. + __ Rev8(rd, rs1); + __ Srai(rd, rd, 32); + }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitLongReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitLongReverseBytes(HInvoke* invoke) { + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Rev8(rd, rs1); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitShortReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitShortReverseBytes(HInvoke* invoke) { + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { + // There is no 16-bit reverse bytes instruction. + __ Rev8(rd, rs1); + __ Srai(rd, rd, 48); + }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitIntegerBitCount(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitIntegerBitCount(HInvoke* invoke) { + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Cpopw(rd, rs1); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitLongBitCount(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitLongBitCount(HInvoke* invoke) { + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Cpop(rd, rs1); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitIntegerHighestOneBit(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitIntegerHighestOneBit(HInvoke* invoke) { + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + XRegister tmp2 = srs.AllocateXRegister(); + __ Clzw(tmp, rs1); + __ Li(tmp2, INT64_C(-0x80000000)); + __ Srlw(tmp2, tmp2, tmp); + __ And(rd, rs1, tmp2); // Make sure the result is zero if the input is zero. + }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitLongHighestOneBit(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitLongHighestOneBit(HInvoke* invoke) { + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + XRegister tmp2 = srs.AllocateXRegister(); + __ Clz(tmp, rs1); + __ Li(tmp2, INT64_C(-0x8000000000000000)); + __ Srl(tmp2, tmp2, tmp); + __ And(rd, rs1, tmp2); // Make sure the result is zero if the input is zero. + }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitIntegerLowestOneBit(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitIntegerLowestOneBit(HInvoke* invoke) { + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + __ NegW(tmp, rs1); + __ And(rd, rs1, tmp); + }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitLongLowestOneBit(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitLongLowestOneBit(HInvoke* invoke) { + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + __ Neg(tmp, rs1); + __ And(rd, rs1, tmp); + }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Clzw(rd, rs1); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Clz(rd, rs1); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Ctzw(rd, rs1); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Ctz(rd, rs1); }); +} + +#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(RISCV64, Name) +UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(MARK_UNIMPLEMENTED); +#undef MARK_UNIMPLEMENTED + +UNREACHABLE_INTRINSICS(RISCV64) + +} // namespace riscv64 +} // namespace art diff --git a/compiler/optimizing/intrinsics_riscv64.h b/compiler/optimizing/intrinsics_riscv64.h new file mode 100644 index 0000000000..49c057de2b --- /dev/null +++ b/compiler/optimizing/intrinsics_riscv64.h @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_RISCV64_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSICS_RISCV64_H_ + +#include "base/macros.h" +#include "intrinsics.h" +#include "intrinsics_list.h" + +namespace art HIDDEN { + +class ArenaAllocator; +class HInvokeStaticOrDirect; +class HInvokeVirtual; + +namespace riscv64 { + +class CodeGeneratorRISCV64; +class Riscv64Assembler; + +class IntrinsicLocationsBuilderRISCV64 final : public IntrinsicVisitor { + public: + explicit IntrinsicLocationsBuilderRISCV64(ArenaAllocator* allocator, + CodeGeneratorRISCV64* codegen) + : allocator_(allocator), codegen_(codegen) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, ...) \ + void Visit##Name(HInvoke* invoke) override; + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef OPTIMIZING_INTRINSICS + + // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether + // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to + // the invoke. + bool TryDispatch(HInvoke* invoke); + + private: + ArenaAllocator* const allocator_; + CodeGeneratorRISCV64* const codegen_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderRISCV64); +}; + +class IntrinsicCodeGeneratorRISCV64 final : public IntrinsicVisitor { + public: + explicit IntrinsicCodeGeneratorRISCV64(CodeGeneratorRISCV64* codegen) : codegen_(codegen) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, ...) \ + void Visit##Name(HInvoke* invoke); + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef OPTIMIZING_INTRINSICS + + private: + Riscv64Assembler* GetAssembler(); + + ArenaAllocator* GetAllocator(); + + CodeGeneratorRISCV64* const codegen_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorRISCV64); +}; + +} // namespace riscv64 +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_RISCV64_H_ diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index d2072201f8..02f312e74e 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -394,7 +394,6 @@ void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { } HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(static_or_direct != nullptr); LocationSummary* locations = new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); @@ -774,9 +773,9 @@ void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) { static void CreateSystemArrayCopyLocations(HInvoke* invoke) { // We need at least two of the positions or length to be an integer constant, // or else we won't have enough free registers. - HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); - HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); - HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); + HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull(); + HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull(); + HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull(); int num_constants = ((src_pos != nullptr) ? 1 : 0) @@ -1205,7 +1204,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, HInstruction* code_point = invoke->InputAt(1); if (code_point->IsIntConstant()) { if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > - std::numeric_limits<uint16_t>::max()) { + std::numeric_limits<uint16_t>::max()) { // Always needs the slow-path. We could directly dispatch to it, but this case should be // rare, so for simplicity just put the full slow-path down and branch unconditionally. slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); @@ -1445,7 +1444,7 @@ void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) { Register obj = locations->InAt(0).AsRegister<Register>(); Location srcBegin = locations->InAt(1); int srcBegin_value = - srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0; + srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0; Register srcEnd = locations->InAt(2).AsRegister<Register>(); Register dst = locations->InAt(3).AsRegister<Register>(); Register dstBegin = locations->InAt(4).AsRegister<Register>(); @@ -3504,7 +3503,7 @@ void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) { locations->SetInAt(0, Location::Any()); } -void IntrinsicCodeGeneratorX86::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } +void IntrinsicCodeGeneratorX86::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {} void IntrinsicLocationsBuilderX86::VisitIntegerDivideUnsigned(HInvoke* invoke) { LocationSummary* locations = new (allocator_) LocationSummary(invoke, @@ -3781,7 +3780,7 @@ static Register GenerateVarHandleFieldReference(HInvoke* invoke, Location::RegisterLocation(temp), Address(temp, declaring_class_offset), /* fixup_label= */ nullptr, - gCompilerReadBarrierOption); + GetCompilerReadBarrierOption()); return temp; } @@ -3860,7 +3859,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) { Address field_addr(ref, offset, TIMES_1, 0); // Load the value from the field - if (type == DataType::Type::kReference && gCompilerReadBarrierOption == kWithReadBarrier) { + if (type == DataType::Type::kReference && GetCompilerReadBarrierOption() == kWithReadBarrier) { codegen->GenerateReferenceLoadWithBakerReadBarrier( invoke, out, ref, field_addr, /* needs_null_check= */ false); } else if (type == DataType::Type::kInt64 && diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h index 77c236d244..fc2f0e3fbd 100644 --- a/compiler/optimizing/intrinsics_x86.h +++ b/compiler/optimizing/intrinsics_x86.h @@ -19,6 +19,7 @@ #include "base/macros.h" #include "intrinsics.h" +#include "intrinsics_list.h" namespace art HIDDEN { @@ -39,9 +40,7 @@ class IntrinsicLocationsBuilderX86 final : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) override; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether @@ -64,9 +63,7 @@ class IntrinsicCodeGeneratorX86 final : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) override; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS private: diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 9d0d5f155e..842af6b73f 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -222,34 +222,34 @@ static void GenIsInfinite(LocationSummary* locations, double kPositiveInfinity = std::numeric_limits<double>::infinity(); double kNegativeInfinity = -1 * kPositiveInfinity; - __ xorq(output, output); - __ comisd(input, codegen->LiteralDoubleAddress(kPositiveInfinity)); - __ j(kNotEqual, &done1); - __ j(kParityEven, &done2); - __ movq(output, Immediate(1)); - __ jmp(&done2); - __ Bind(&done1); - __ comisd(input, codegen->LiteralDoubleAddress(kNegativeInfinity)); - __ j(kNotEqual, &done2); - __ j(kParityEven, &done2); - __ movq(output, Immediate(1)); - __ Bind(&done2); + __ xorq(output, output); + __ comisd(input, codegen->LiteralDoubleAddress(kPositiveInfinity)); + __ j(kNotEqual, &done1); + __ j(kParityEven, &done2); + __ movq(output, Immediate(1)); + __ jmp(&done2); + __ Bind(&done1); + __ comisd(input, codegen->LiteralDoubleAddress(kNegativeInfinity)); + __ j(kNotEqual, &done2); + __ j(kParityEven, &done2); + __ movq(output, Immediate(1)); + __ Bind(&done2); } else { float kPositiveInfinity = std::numeric_limits<float>::infinity(); float kNegativeInfinity = -1 * kPositiveInfinity; - __ xorl(output, output); - __ comiss(input, codegen->LiteralFloatAddress(kPositiveInfinity)); - __ j(kNotEqual, &done1); - __ j(kParityEven, &done2); - __ movl(output, Immediate(1)); - __ jmp(&done2); - __ Bind(&done1); - __ comiss(input, codegen->LiteralFloatAddress(kNegativeInfinity)); - __ j(kNotEqual, &done2); - __ j(kParityEven, &done2); - __ movl(output, Immediate(1)); - __ Bind(&done2); + __ xorl(output, output); + __ comiss(input, codegen->LiteralFloatAddress(kPositiveInfinity)); + __ j(kNotEqual, &done1); + __ j(kParityEven, &done2); + __ movl(output, Immediate(1)); + __ jmp(&done2); + __ Bind(&done1); + __ comiss(input, codegen->LiteralFloatAddress(kNegativeInfinity)); + __ j(kNotEqual, &done2); + __ j(kParityEven, &done2); + __ movl(output, Immediate(1)); + __ Bind(&done2); } } @@ -617,8 +617,8 @@ void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) { static void CreateSystemArrayCopyLocations(HInvoke* invoke) { // Check to see if we have known failures that will cause us to have to bail out // to the runtime, and just generate the runtime call directly. - HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); - HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); + HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull(); + HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull(); // The positions must be non-negative. if ((src_pos != nullptr && src_pos->GetValue() < 0) || @@ -628,7 +628,7 @@ static void CreateSystemArrayCopyLocations(HInvoke* invoke) { } // The length must be > 0. - HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); + HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull(); if (length != nullptr) { int32_t len = length->GetValue(); if (len < 0) { @@ -1424,7 +1424,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, HInstruction* code_point = invoke->InputAt(1); if (code_point->IsIntConstant()) { if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > - std::numeric_limits<uint16_t>::max()) { + std::numeric_limits<uint16_t>::max()) { // Always needs the slow-path. We could directly dispatch to it, but this case should be // rare, so for simplicity just put the full slow-path down and branch unconditionally. slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke); @@ -1655,7 +1655,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) { CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); Location srcBegin = locations->InAt(1); int srcBegin_value = - srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0; + srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0; CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>(); CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>(); CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>(); @@ -1871,7 +1871,7 @@ void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) { static void GenUnsafeGet(HInvoke* invoke, DataType::Type type, - bool is_volatile ATTRIBUTE_UNUSED, + [[maybe_unused]] bool is_volatile, CodeGeneratorX86_64* codegen) { X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); LocationSummary* locations = invoke->GetLocations(); @@ -3249,7 +3249,7 @@ void IntrinsicLocationsBuilderX86_64::VisitReachabilityFence(HInvoke* invoke) { locations->SetInAt(0, Location::Any()); } -void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } +void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {} static void CreateDivideUnsignedLocations(HInvoke* invoke, ArenaAllocator* allocator) { LocationSummary* locations = @@ -3770,8 +3770,8 @@ static void GenerateVarHandleTarget(HInvoke* invoke, instr_codegen->GenerateGcRootFieldLoad(invoke, Location::RegisterLocation(target.object), Address(method, ArtField::DeclaringClassOffset()), - /*fixup_label=*/ nullptr, - gCompilerReadBarrierOption); + /*fixup_label=*/nullptr, + GetCompilerReadBarrierOption()); } } } else { diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h index 59fe815a94..d0ee6f622d 100644 --- a/compiler/optimizing/intrinsics_x86_64.h +++ b/compiler/optimizing/intrinsics_x86_64.h @@ -19,6 +19,7 @@ #include "base/macros.h" #include "intrinsics.h" +#include "intrinsics_list.h" namespace art HIDDEN { @@ -39,9 +40,7 @@ class IntrinsicLocationsBuilderX86_64 final : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) override; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether @@ -64,9 +63,7 @@ class IntrinsicCodeGeneratorX86_64 final : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) override; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS private: diff --git a/compiler/optimizing/jit_patches_arm64.cc b/compiler/optimizing/jit_patches_arm64.cc new file mode 100644 index 0000000000..76ba182acb --- /dev/null +++ b/compiler/optimizing/jit_patches_arm64.cc @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generation_data.h" +#include "gc_root.h" +#include "jit_patches_arm64.h" + +namespace art HIDDEN { + +namespace arm64 { + +vixl::aarch64::Literal<uint32_t>* JitPatchesARM64::DeduplicateUint32Literal( + uint32_t value) { + return uint32_literals_.GetOrCreate( + value, + [this, value]() { + return GetVIXLAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(value); + }); +} + +vixl::aarch64::Literal<uint64_t>* JitPatchesARM64::DeduplicateUint64Literal( + uint64_t value) { + return uint64_literals_.GetOrCreate( + value, + [this, value]() { + return GetVIXLAssembler()->CreateLiteralDestroyedWithPool<uint64_t>(value); + }); +} + +static void PatchJitRootUse(uint8_t* code, + const uint8_t* roots_data, + vixl::aarch64::Literal<uint32_t>* literal, + uint64_t index_in_table) { + uint32_t literal_offset = literal->GetOffset(); + uintptr_t address = + reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); + uint8_t* data = code + literal_offset; + reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address); +} + +void JitPatchesARM64::EmitJitRootPatches( + uint8_t* code, + const uint8_t* roots_data, + const CodeGenerationData& code_generation_data) const { + for (const auto& entry : jit_string_patches_) { + const StringReference& string_reference = entry.first; + vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; + uint64_t index_in_table = code_generation_data.GetJitStringRootIndex(string_reference); + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); + } + for (const auto& entry : jit_class_patches_) { + const TypeReference& type_reference = entry.first; + vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; + uint64_t index_in_table = code_generation_data.GetJitClassRootIndex(type_reference); + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); + } +} + +vixl::aarch64::Literal<uint32_t>* JitPatchesARM64::DeduplicateBootImageAddressLiteral( + uint64_t address) { + return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address)); +} + +vixl::aarch64::Literal<uint32_t>* JitPatchesARM64::DeduplicateJitStringLiteral( + const DexFile& dex_file, + dex::StringIndex string_index, + Handle<mirror::String> handle, + CodeGenerationData* code_generation_data) { + code_generation_data->ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); + return jit_string_patches_.GetOrCreate( + StringReference(&dex_file, string_index), + [this]() { + return GetVIXLAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); + }); +} + +vixl::aarch64::Literal<uint32_t>* JitPatchesARM64::DeduplicateJitClassLiteral( + const DexFile& dex_file, + dex::TypeIndex type_index, + Handle<mirror::Class> handle, + CodeGenerationData* code_generation_data) { + code_generation_data->ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); + return jit_class_patches_.GetOrCreate( + TypeReference(&dex_file, type_index), + [this]() { + return GetVIXLAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); + }); +} + +} // namespace arm64 +} // namespace art diff --git a/compiler/optimizing/jit_patches_arm64.h b/compiler/optimizing/jit_patches_arm64.h new file mode 100644 index 0000000000..f928723f58 --- /dev/null +++ b/compiler/optimizing/jit_patches_arm64.h @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_JIT_PATCHES_ARM64_H_ +#define ART_COMPILER_OPTIMIZING_JIT_PATCHES_ARM64_H_ + +#include "base/arena_allocator.h" +#include "base/arena_containers.h" +#include "dex/dex_file.h" +#include "dex/string_reference.h" +#include "dex/type_reference.h" +#include "handle.h" +#include "mirror/class.h" +#include "mirror/string.h" +#include "utils/arm64/assembler_arm64.h" + +// TODO(VIXL): Make VIXL compile with -Wshadow. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" +#include "aarch64/disasm-aarch64.h" +#include "aarch64/macro-assembler-aarch64.h" +#pragma GCC diagnostic pop + +namespace art HIDDEN { + +class CodeGenerationData; + +namespace arm64 { + +/** + * Helper for emitting string or class literals into JIT generated code, + * which can be shared between different compilers. + */ +class JitPatchesARM64 { + public: + JitPatchesARM64(Arm64Assembler* assembler, ArenaAllocator* allocator) : + assembler_(assembler), + uint32_literals_(std::less<uint32_t>(), + allocator->Adapter(kArenaAllocCodeGenerator)), + uint64_literals_(std::less<uint64_t>(), + allocator->Adapter(kArenaAllocCodeGenerator)), + jit_string_patches_(StringReferenceValueComparator(), + allocator->Adapter(kArenaAllocCodeGenerator)), + jit_class_patches_(TypeReferenceValueComparator(), + allocator->Adapter(kArenaAllocCodeGenerator)) { + } + + using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>; + using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>; + using StringToLiteralMap = ArenaSafeMap<StringReference, + vixl::aarch64::Literal<uint32_t>*, + StringReferenceValueComparator>; + using TypeToLiteralMap = ArenaSafeMap<TypeReference, + vixl::aarch64::Literal<uint32_t>*, + TypeReferenceValueComparator>; + + vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value); + vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value); + vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address); + vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral( + const DexFile& dex_file, + dex::StringIndex string_index, + Handle<mirror::String> handle, + CodeGenerationData* code_generation_data); + vixl::aarch64::Literal<uint32_t>* DeduplicateJitClassLiteral( + const DexFile& dex_file, + dex::TypeIndex type_index, + Handle<mirror::Class> handle, + CodeGenerationData* code_generation_data); + + void EmitJitRootPatches(uint8_t* code, + const uint8_t* roots_data, + const CodeGenerationData& code_generation_data) const; + + Arm64Assembler* GetAssembler() const { return assembler_; } + vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); } + + private: + Arm64Assembler* assembler_; + // Deduplication map for 32-bit literals, used for JIT for boot image addresses. + Uint32ToLiteralMap uint32_literals_; + // Deduplication map for 64-bit literals, used for JIT for method address or method code. + Uint64ToLiteralMap uint64_literals_; + // Patches for string literals in JIT compiled code. + StringToLiteralMap jit_string_patches_; + // Patches for class literals in JIT compiled code. + TypeToLiteralMap jit_class_patches_; +}; + +} // namespace arm64 + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_JIT_PATCHES_ARM64_H_ diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index 01daa23511..6f4f2b6cf6 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -55,6 +55,7 @@ void LinearizeTest::TestCode(const std::vector<uint16_t>& data, } TEST_F(LinearizeTest, CFG1) { + TEST_DISABLED_FOR_RISCV64(); // Structure of this graph (+ are back edges) // Block0 // | @@ -80,6 +81,7 @@ TEST_F(LinearizeTest, CFG1) { } TEST_F(LinearizeTest, CFG2) { + TEST_DISABLED_FOR_RISCV64(); // Structure of this graph (+ are back edges) // Block0 // | @@ -105,6 +107,7 @@ TEST_F(LinearizeTest, CFG2) { } TEST_F(LinearizeTest, CFG3) { + TEST_DISABLED_FOR_RISCV64(); // Structure of this graph (+ are back edges) // Block0 // | @@ -132,6 +135,7 @@ TEST_F(LinearizeTest, CFG3) { } TEST_F(LinearizeTest, CFG4) { + TEST_DISABLED_FOR_RISCV64(); /* Structure of this graph (+ are back edges) // Block0 // | @@ -162,6 +166,7 @@ TEST_F(LinearizeTest, CFG4) { } TEST_F(LinearizeTest, CFG5) { + TEST_DISABLED_FOR_RISCV64(); /* Structure of this graph (+ are back edges) // Block0 // | @@ -192,6 +197,7 @@ TEST_F(LinearizeTest, CFG5) { } TEST_F(LinearizeTest, CFG6) { + TEST_DISABLED_FOR_RISCV64(); // Block0 // | // Block1 @@ -218,6 +224,7 @@ TEST_F(LinearizeTest, CFG6) { } TEST_F(LinearizeTest, CFG7) { + TEST_DISABLED_FOR_RISCV64(); // Structure of this graph (+ are back edges) // Block0 // | diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index fb1a23eef4..7e488ba41d 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -47,6 +47,7 @@ HGraph* LiveRangesTest::BuildGraph(const std::vector<uint16_t>& data) { } TEST_F(LiveRangesTest, CFG1) { + TEST_DISABLED_FOR_RISCV64(); /* * Test the following snippet: * return 0; @@ -81,6 +82,7 @@ TEST_F(LiveRangesTest, CFG1) { } TEST_F(LiveRangesTest, CFG2) { + TEST_DISABLED_FOR_RISCV64(); /* * Test the following snippet: * var a = 0; @@ -125,6 +127,7 @@ TEST_F(LiveRangesTest, CFG2) { } TEST_F(LiveRangesTest, CFG3) { + TEST_DISABLED_FOR_RISCV64(); /* * Test the following snippet: * var a = 0; @@ -194,6 +197,7 @@ TEST_F(LiveRangesTest, CFG3) { } TEST_F(LiveRangesTest, Loop1) { + TEST_DISABLED_FOR_RISCV64(); /* * Test the following snippet: * var a = 0; @@ -270,6 +274,7 @@ TEST_F(LiveRangesTest, Loop1) { } TEST_F(LiveRangesTest, Loop2) { + TEST_DISABLED_FOR_RISCV64(); /* * Test the following snippet: * var a = 0; @@ -341,6 +346,7 @@ TEST_F(LiveRangesTest, Loop2) { } TEST_F(LiveRangesTest, CFG4) { + TEST_DISABLED_FOR_RISCV64(); /* * Test the following snippet: * var a = 0; diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 0b421cf9e6..6af07aea4e 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -70,6 +70,7 @@ void LivenessTest::TestCode(const std::vector<uint16_t>& data, const char* expec } TEST_F(LivenessTest, CFG1) { + TEST_DISABLED_FOR_RISCV64(); const char* expected = "Block 0\n" " live in: (0)\n" @@ -93,6 +94,7 @@ TEST_F(LivenessTest, CFG1) { } TEST_F(LivenessTest, CFG2) { + TEST_DISABLED_FOR_RISCV64(); const char* expected = "Block 0\n" " live in: (0)\n" @@ -115,6 +117,7 @@ TEST_F(LivenessTest, CFG2) { } TEST_F(LivenessTest, CFG3) { + TEST_DISABLED_FOR_RISCV64(); const char* expected = "Block 0\n" // entry block " live in: (000)\n" @@ -144,6 +147,7 @@ TEST_F(LivenessTest, CFG3) { } TEST_F(LivenessTest, CFG4) { + TEST_DISABLED_FOR_RISCV64(); // var a; // if (0 == 0) { // a = 5; @@ -192,6 +196,7 @@ TEST_F(LivenessTest, CFG4) { } TEST_F(LivenessTest, CFG5) { + TEST_DISABLED_FOR_RISCV64(); // var a = 0; // if (0 == 0) { // } else { @@ -237,6 +242,7 @@ TEST_F(LivenessTest, CFG5) { } TEST_F(LivenessTest, Loop1) { + TEST_DISABLED_FOR_RISCV64(); // Simple loop with one preheader and one back edge. // var a = 0; // while (a == a) { @@ -283,6 +289,7 @@ TEST_F(LivenessTest, Loop1) { } TEST_F(LivenessTest, Loop3) { + TEST_DISABLED_FOR_RISCV64(); // Test that the returned value stays live in a preceding loop. // var a = 0; // while (a == a) { @@ -330,6 +337,7 @@ TEST_F(LivenessTest, Loop3) { TEST_F(LivenessTest, Loop4) { + TEST_DISABLED_FOR_RISCV64(); // Make sure we support a preheader of a loop not being the first predecessor // in the predecessor list of the header. // var a = 0; @@ -382,6 +390,7 @@ TEST_F(LivenessTest, Loop4) { } TEST_F(LivenessTest, Loop5) { + TEST_DISABLED_FOR_RISCV64(); // Make sure we create a preheader of a loop when a header originally has two // incoming blocks and one back edge. // Bitsets are made of: @@ -438,6 +447,7 @@ TEST_F(LivenessTest, Loop5) { } TEST_F(LivenessTest, Loop6) { + TEST_DISABLED_FOR_RISCV64(); // Bitsets are made of: // (constant0, constant4, constant5, phi in block 2) const char* expected = @@ -489,6 +499,7 @@ TEST_F(LivenessTest, Loop6) { TEST_F(LivenessTest, Loop7) { + TEST_DISABLED_FOR_RISCV64(); // Bitsets are made of: // (constant0, constant4, constant5, phi in block 2, phi in block 6) const char* expected = @@ -543,6 +554,7 @@ TEST_F(LivenessTest, Loop7) { } TEST_F(LivenessTest, Loop8) { + TEST_DISABLED_FOR_RISCV64(); // var a = 0; // while (a == a) { // a = a + a; diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc index f1c50ac03c..75000c8b91 100644 --- a/compiler/optimizing/load_store_analysis.cc +++ b/compiler/optimizing/load_store_analysis.cc @@ -41,7 +41,7 @@ static bool CanBinaryOpAndIndexAlias(const HBinaryOperation* idx1, // We currently only support Add and Sub operations. return true; } - if (idx1->AsBinaryOperation()->GetLeastConstantLeft() != idx2) { + if (idx1->GetLeastConstantLeft() != idx2) { // Cannot analyze [i+CONST1] and [j]. return true; } @@ -51,9 +51,9 @@ static bool CanBinaryOpAndIndexAlias(const HBinaryOperation* idx1, // Since 'i' are the same in [i+CONST] and [i], // further compare [CONST] and [0]. - int64_t l1 = idx1->IsAdd() ? - idx1->GetConstantRight()->AsIntConstant()->GetValue() : - -idx1->GetConstantRight()->AsIntConstant()->GetValue(); + int64_t l1 = idx1->IsAdd() + ? idx1->GetConstantRight()->AsIntConstant()->GetValue() + : -idx1->GetConstantRight()->AsIntConstant()->GetValue(); int64_t l2 = 0; int64_t h1 = l1 + (vector_length1 - 1); int64_t h2 = l2 + (vector_length2 - 1); @@ -68,8 +68,7 @@ static bool CanBinaryOpsAlias(const HBinaryOperation* idx1, // We currently only support Add and Sub operations. return true; } - if (idx1->AsBinaryOperation()->GetLeastConstantLeft() != - idx2->AsBinaryOperation()->GetLeastConstantLeft()) { + if (idx1->GetLeastConstantLeft() != idx2->GetLeastConstantLeft()) { // Cannot analyze [i+CONST1] and [j+CONST2]. return true; } @@ -80,12 +79,12 @@ static bool CanBinaryOpsAlias(const HBinaryOperation* idx1, // Since 'i' are the same in [i+CONST1] and [i+CONST2], // further compare [CONST1] and [CONST2]. - int64_t l1 = idx1->IsAdd() ? - idx1->GetConstantRight()->AsIntConstant()->GetValue() : - -idx1->GetConstantRight()->AsIntConstant()->GetValue(); - int64_t l2 = idx2->IsAdd() ? - idx2->GetConstantRight()->AsIntConstant()->GetValue() : - -idx2->GetConstantRight()->AsIntConstant()->GetValue(); + int64_t l1 = idx1->IsAdd() + ? idx1->GetConstantRight()->AsIntConstant()->GetValue() + : -idx1->GetConstantRight()->AsIntConstant()->GetValue(); + int64_t l2 = idx2->IsAdd() + ? idx2->GetConstantRight()->AsIntConstant()->GetValue() + : -idx2->GetConstantRight()->AsIntConstant()->GetValue(); int64_t h1 = l1 + (vector_length1 - 1); int64_t h2 = l2 + (vector_length2 - 1); return CanIntegerRangesOverlap(l1, h1, l2, h2); @@ -269,6 +268,13 @@ bool HeapLocationCollector::CanArrayElementsAlias(const HInstruction* idx1, } bool LoadStoreAnalysis::Run() { + // Currently load_store analysis can't handle predicated load/stores; specifically pairs of + // memory operations with different predicates. + // TODO: support predicated SIMD. + if (graph_->HasPredicatedSIMD()) { + return false; + } + for (HBasicBlock* block : graph_->GetReversePostOrder()) { heap_location_collector_.VisitBasicBlock(block); } diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h index c46a5b9cc1..ee425454a0 100644 --- a/compiler/optimizing/load_store_analysis.h +++ b/compiler/optimizing/load_store_analysis.h @@ -610,6 +610,7 @@ class HeapLocationCollector : public HGraphVisitor { } void VisitVecLoad(HVecLoad* instruction) override { + DCHECK(!instruction->IsPredicated()); HInstruction* array = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); DataType::Type type = instruction->GetPackedType(); @@ -618,6 +619,7 @@ class HeapLocationCollector : public HGraphVisitor { } void VisitVecStore(HVecStore* instruction) override { + DCHECK(!instruction->IsPredicated()); HInstruction* array = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); DataType::Type type = instruction->GetPackedType(); diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc index 865febbd31..8c6812f184 100644 --- a/compiler/optimizing/load_store_analysis_test.cc +++ b/compiler/optimizing/load_store_analysis_test.cc @@ -897,7 +897,7 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape) { HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); - call_left->AsInvoke()->SetRawInputAt(0, new_inst); + call_left->SetRawInputAt(0, new_inst); left->AddInstruction(call_left); left->AddInstruction(goto_left); @@ -1007,7 +1007,7 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape2) { HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); - call_left->AsInvoke()->SetRawInputAt(0, new_inst); + call_left->SetRawInputAt(0, new_inst); left->AddInstruction(call_left); left->AddInstruction(goto_left); @@ -1131,7 +1131,7 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape3) { HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); - call_left->AsInvoke()->SetRawInputAt(0, new_inst); + call_left->SetRawInputAt(0, new_inst); left->AddInstruction(call_left); left->AddInstruction(goto_left); @@ -1412,7 +1412,7 @@ TEST_F(LoadStoreAnalysisTest, TotalEscapeAdjacentNoPredicated) { HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); - call_left->AsInvoke()->SetRawInputAt(0, new_inst); + call_left->SetRawInputAt(0, new_inst); left->AddInstruction(call_left); left->AddInstruction(goto_left); @@ -1514,7 +1514,7 @@ TEST_F(LoadStoreAnalysisTest, TotalEscapeAdjacent) { HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); - call_left->AsInvoke()->SetRawInputAt(0, new_inst); + call_left->SetRawInputAt(0, new_inst); left->AddInstruction(call_left); left->AddInstruction(goto_left); @@ -1626,7 +1626,7 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape) { HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); - call_left->AsInvoke()->SetRawInputAt(0, new_inst); + call_left->SetRawInputAt(0, new_inst); left->AddInstruction(call_left); left->AddInstruction(goto_left); @@ -1653,7 +1653,7 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape) { graph_->GetDexFile(), 0); HInstruction* goto_right = new (GetAllocator()) HGoto(); - call_right->AsInvoke()->SetRawInputAt(0, new_inst); + call_right->SetRawInputAt(0, new_inst); right->AddInstruction(write_right); right->AddInstruction(call_right); right->AddInstruction(goto_right); @@ -1813,7 +1813,7 @@ TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) { HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); - call_left->AsInvoke()->SetRawInputAt(0, new_inst); + call_left->SetRawInputAt(0, new_inst); high_left->AddInstruction(call_left); high_left->AddInstruction(goto_left); @@ -1870,7 +1870,7 @@ TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) { HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, !graph_->IsDebuggable()); HInstruction* goto_low_left = new (GetAllocator()) HGoto(); - call_low_left->AsInvoke()->SetRawInputAt(0, new_inst); + call_low_left->SetRawInputAt(0, new_inst); low_left->AddInstruction(call_low_left); low_left->AddInstruction(goto_low_left); @@ -2030,7 +2030,7 @@ TEST_F(LoadStoreAnalysisTest, PartialPhiPropagation1) { HInstruction* goto_left_merge = new (GetAllocator()) HGoto(); left_phi->SetRawInputAt(0, obj_param); left_phi->SetRawInputAt(1, new_inst); - call_left->AsInvoke()->SetRawInputAt(0, left_phi); + call_left->SetRawInputAt(0, left_phi); left_merge->AddPhi(left_phi); left_merge->AddInstruction(call_left); left_merge->AddInstruction(goto_left_merge); diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 9cabb12a9f..58fdd1cd05 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -1080,10 +1080,12 @@ class LSEVisitor final : private HGraphDelegateVisitor { } void VisitVecLoad(HVecLoad* instruction) override { + DCHECK(!instruction->IsPredicated()); VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction)); } void VisitVecStore(HVecStore* instruction) override { + DCHECK(!instruction->IsPredicated()); size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction); VisitSetLocation(instruction, idx, instruction->GetValue()); } @@ -4041,6 +4043,13 @@ bool LoadStoreElimination::Run(bool enable_partial_lse) { return false; } + // Currently load_store analysis can't handle predicated load/stores; specifically pairs of + // memory operations with different predicates. + // TODO: support predicated SIMD. + if (graph_->HasPredicatedSIMD()) { + return false; + } + std::unique_ptr<LSEVisitorWrapper> lse_visitor(new (&allocator) LSEVisitorWrapper( graph_, heap_location_collector, enable_partial_lse, stats_)); lse_visitor->Run(); diff --git a/compiler/optimizing/load_store_elimination_test.cc b/compiler/optimizing/load_store_elimination_test.cc index 1ee109980f..d3cf8bfa2a 100644 --- a/compiler/optimizing/load_store_elimination_test.cc +++ b/compiler/optimizing/load_store_elimination_test.cc @@ -573,7 +573,8 @@ TEST_F(LoadStoreEliminationTest, SameHeapValue2) { AddVecStore(entry_block_, array_, j_); HInstruction* vstore = AddVecStore(entry_block_, array_, i_); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vstore)); @@ -589,7 +590,8 @@ TEST_F(LoadStoreEliminationTest, SameHeapValue3) { AddVecStore(entry_block_, array_, i_add1_); HInstruction* vstore = AddVecStore(entry_block_, array_, i_); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vstore)); @@ -634,7 +636,8 @@ TEST_F(LoadStoreEliminationTest, OverlappingLoadStore) { AddArraySet(entry_block_, array_, i_, c1); HInstruction* vload5 = AddVecLoad(entry_block_, array_, i_); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_TRUE(IsRemoved(load1)); @@ -668,7 +671,8 @@ TEST_F(LoadStoreEliminationTest, StoreAfterLoopWithoutSideEffects) { // a[j] = 1; HInstruction* array_set = AddArraySet(return_block_, array_, j_, c1); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_TRUE(IsRemoved(array_set)); @@ -701,12 +705,13 @@ TEST_F(LoadStoreEliminationTest, StoreAfterSIMDLoopWithSideEffects) { // b[phi,phi+1,phi+2,phi+3] = a[phi,phi+1,phi+2,phi+3]; AddVecStore(loop_, array_, phi_); HInstruction* vload = AddVecLoad(loop_, array_, phi_); - AddVecStore(loop_, array_b, phi_, vload->AsVecLoad()); + AddVecStore(loop_, array_b, phi_, vload); // a[j] = 0; HInstruction* a_set = AddArraySet(return_block_, array_, j_, c0); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_TRUE(IsRemoved(vload)); @@ -740,12 +745,13 @@ TEST_F(LoadStoreEliminationTest, LoadAfterSIMDLoopWithSideEffects) { // b[phi,phi+1,phi+2,phi+3] = a[phi,phi+1,phi+2,phi+3]; AddVecStore(loop_, array_, phi_); HInstruction* vload = AddVecLoad(loop_, array_, phi_); - AddVecStore(loop_, array_b, phi_, vload->AsVecLoad()); + AddVecStore(loop_, array_b, phi_, vload); // x = a[j]; HInstruction* load = AddArrayGet(return_block_, array_, j_); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_TRUE(IsRemoved(vload)); @@ -786,7 +792,8 @@ TEST_F(LoadStoreEliminationTest, MergePredecessorVecStores) { // down: a[i,... i + 3] = [1,...1] HInstruction* vstore4 = AddVecStore(down, array_, i_, vdata); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_TRUE(IsRemoved(vstore2)); @@ -874,10 +881,11 @@ TEST_F(LoadStoreEliminationTest, RedundantVStoreVLoadInLoop) { // a[i,... i + 3] = [1,...1] HInstruction* vstore1 = AddVecStore(loop_, array_a, phi_); HInstruction* vload = AddVecLoad(loop_, array_a, phi_); - HInstruction* vstore2 = AddVecStore(loop_, array_b, phi_, vload->AsVecLoad()); + HInstruction* vstore2 = AddVecStore(loop_, array_b, phi_, vload); HInstruction* vstore3 = AddVecStore(loop_, array_a, phi_, vstore1->InputAt(2)); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vstore1)); @@ -963,9 +971,10 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueInLoopWithoutWriteSideEffects) // v = a[i,... i + 3] // array[0,... 3] = v HInstruction* vload = AddVecLoad(loop_, array_a, phi_); - HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad()); + HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload)); @@ -987,9 +996,10 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValue) { // v = a[0,... 3] // array[0,... 3] = v HInstruction* vload = AddVecLoad(pre_header_, array_a, c0); - HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad()); + HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload)); @@ -1063,10 +1073,11 @@ TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValueInLoopWithoutWriteSideE // array[0] = v1 HInstruction* vload = AddVecLoad(loop_, array_a, phi_); HInstruction* load = AddArrayGet(loop_, array_a, phi_); - HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad()); + HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload); HInstruction* store = AddArraySet(return_block_, array_, c0, load); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload)); @@ -1094,10 +1105,11 @@ TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValue) { // array[0] = v1 HInstruction* vload = AddVecLoad(pre_header_, array_a, c0); HInstruction* load = AddArrayGet(pre_header_, array_a, c0); - HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad()); + HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload); HInstruction* store = AddArraySet(return_block_, array_, c0, load); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload)); @@ -1126,10 +1138,11 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoadInLoopWithoutWriteSide // array[128,... 131] = v1 HInstruction* vload1 = AddVecLoad(loop_, array_a, phi_); HInstruction* vload2 = AddVecLoad(loop_, array_a, phi_); - HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad()); - HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad()); + HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1); + HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload1)); @@ -1157,10 +1170,11 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoad) { // array[128,... 131] = v1 HInstruction* vload1 = AddVecLoad(pre_header_, array_a, c0); HInstruction* vload2 = AddVecLoad(pre_header_, array_a, c0); - HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad()); - HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad()); + HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1); + HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload1)); @@ -2139,9 +2153,9 @@ TEST_F(LoadStoreEliminationTest, PartialLoadElimination) { right->AddInstruction(read_right); right->AddInstruction(goto_right); - HInstruction* phi_final = MakePhi({read_left, read_right}); + HPhi* phi_final = MakePhi({read_left, read_right}); HInstruction* return_exit = new (GetAllocator()) HReturn(phi_final); - exit->AddPhi(phi_final->AsPhi()); + exit->AddPhi(phi_final); exit->AddInstruction(return_exit); // PerformLSE expects this to be empty. @@ -5153,7 +5167,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonAfterCohort) { CheckFinalInstruction(if_merge->InputAt(0), ComparisonPlacement::kAfterEscape); EXPECT_INS_EQ(init_set->InputAt(1), c3); ASSERT_TRUE(write_partial->InputAt(0)->IsPhi()); - EXPECT_INS_EQ(write_partial->InputAt(0)->AsPhi()->InputAt(0), init_set->InputAt(0)); + EXPECT_INS_EQ(write_partial->InputAt(0)->InputAt(0), init_set->InputAt(0)); EXPECT_INS_EQ(write_partial->InputAt(1), c4); EXPECT_INS_EQ(pred_get->GetTarget(), merge_alloc); EXPECT_INS_EQ(pred_get->GetDefaultValue(), merge_value_return); @@ -5225,14 +5239,14 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonInCohortAfterEscape) { HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst }); ComparisonInstructions cmp_instructions = GetComparisonInstructions(new_inst); - HInstruction* if_left = new (GetAllocator()) HIf(cmp_instructions.cmp_); + HIf* if_left = new (GetAllocator()) HIf(cmp_instructions.cmp_); left->AddInstruction(call_left); cmp_instructions.AddSetup(left); left->AddInstruction(cmp_instructions.cmp_); left->AddInstruction(if_left); call_left->CopyEnvironmentFrom(cls->GetEnvironment()); cmp_instructions.AddEnvironment(cls->GetEnvironment()); - if (if_left->AsIf()->IfTrueSuccessor() != partial) { + if (if_left->IfTrueSuccessor() != partial) { left->SwapSuccessors(); } @@ -5381,7 +5395,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedStore1) { right->AddInstruction(write_right); right->AddInstruction(goto_right); - HInstruction* write_bottom = MakeIFieldSet(new_inst, c3, MemberOffset(32)); + HInstanceFieldSet* write_bottom = MakeIFieldSet(new_inst, c3, MemberOffset(32)); HInstruction* return_exit = new (GetAllocator()) HReturnVoid(); breturn->AddInstruction(write_bottom); breturn->AddInstruction(return_exit); @@ -5391,7 +5405,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedStore1) { PerformLSEWithPartial(blks); EXPECT_INS_RETAINED(write_bottom); - EXPECT_TRUE(write_bottom->AsInstanceFieldSet()->GetIsPredicatedSet()); + EXPECT_TRUE(write_bottom->GetIsPredicatedSet()); EXPECT_INS_REMOVED(write_right); EXPECT_INS_RETAINED(call_left); HPhi* merge_alloc = FindSingleInstruction<HPhi>(graph_, breturn); @@ -5491,7 +5505,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedStore2) { non_escape->AddInstruction(non_escape_goto); non_escape_call->CopyEnvironmentFrom(cls->GetEnvironment()); - HInstruction* write_bottom = MakeIFieldSet(new_inst, c4, MemberOffset(32)); + HInstanceFieldSet* write_bottom = MakeIFieldSet(new_inst, c4, MemberOffset(32)); HInstruction* return_exit = new (GetAllocator()) HReturnVoid(); breturn->AddInstruction(write_bottom); breturn->AddInstruction(return_exit); @@ -5501,7 +5515,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedStore2) { PerformLSEWithPartial(blks); EXPECT_INS_RETAINED(write_bottom); - EXPECT_TRUE(write_bottom->AsInstanceFieldSet()->GetIsPredicatedSet()) << *write_bottom; + EXPECT_TRUE(write_bottom->GetIsPredicatedSet()) << *write_bottom; EXPECT_INS_REMOVED(write_right); EXPECT_INS_RETAINED(call_left); HInstanceFieldSet* pred_set = FindSingleInstruction<HInstanceFieldSet>(graph_, breturn); @@ -6786,14 +6800,14 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis1) { FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); EXPECT_INS_REMOVED(read_bottom) << *read_bottom; ASSERT_TRUE(pred_get != nullptr); - HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi(); + HPhi* inst_return_phi = pred_get->GetTarget()->AsPhiOrNull(); ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs(); EXPECT_INS_EQ(inst_return_phi->InputAt(0), FindSingleInstruction<HNewInstance>(graph_, case1->GetSinglePredecessor())); EXPECT_INS_EQ(inst_return_phi->InputAt(1), FindSingleInstruction<HNewInstance>(graph_, case2->GetSinglePredecessor())); EXPECT_INS_EQ(inst_return_phi->InputAt(2), graph_->GetNullConstant()); - HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi(); + HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhiOrNull(); ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs(); EXPECT_INS_EQ(inst_value_phi->InputAt(0), graph_->GetIntConstant(0)); EXPECT_INS_EQ(inst_value_phi->InputAt(1), graph_->GetIntConstant(0)); @@ -6966,14 +6980,14 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis2) { FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); EXPECT_INS_REMOVED(read_bottom) << *read_bottom; ASSERT_TRUE(pred_get != nullptr); - HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi(); + HPhi* inst_return_phi = pred_get->GetTarget()->AsPhiOrNull(); ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs(); EXPECT_INS_EQ(inst_return_phi->InputAt(0), FindSingleInstruction<HNewInstance>(graph_, case1->GetSinglePredecessor())); EXPECT_INS_EQ(inst_return_phi->InputAt(1), FindSingleInstruction<HNewInstance>(graph_, case2->GetSinglePredecessor())); EXPECT_INS_EQ(inst_return_phi->InputAt(2), graph_->GetNullConstant()); - HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi(); + HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhiOrNull(); ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs(); EXPECT_INS_EQ(inst_value_phi->InputAt(0), graph_->GetIntConstant(0)); EXPECT_INS_EQ(inst_value_phi->InputAt(1), graph_->GetIntConstant(0)); @@ -7113,12 +7127,12 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis3) { FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); EXPECT_INS_REMOVED(read_bottom) << *read_bottom; ASSERT_TRUE(pred_get != nullptr); - HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi(); + HPhi* inst_return_phi = pred_get->GetTarget()->AsPhiOrNull(); ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs(); EXPECT_INS_EQ(inst_return_phi->InputAt(0), graph_->GetNullConstant()); EXPECT_INS_EQ(inst_return_phi->InputAt(1), FindSingleInstruction<HNewInstance>(graph_, escape->GetSinglePredecessor())); - HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi(); + HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhiOrNull(); ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs(); HPhi* loop_header_phi = FindSingleInstruction<HPhi>(graph_, loop_header); HPhi* loop_merge_phi = FindSingleInstruction<HPhi>(graph_, loop_merge); @@ -7213,7 +7227,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis4) { HInstruction* goto_no_escape = new (GetAllocator()) HGoto(); no_escape->AddInstruction(goto_no_escape); - HInstruction* write_pre_header = MakeIFieldSet(new_inst, c3, MemberOffset(32)); + HInstanceFieldSet* write_pre_header = MakeIFieldSet(new_inst, c3, MemberOffset(32)); HInstruction* goto_preheader = new (GetAllocator()) HGoto(); loop_pre_header->AddInstruction(write_pre_header); loop_pre_header->AddInstruction(goto_preheader); @@ -7236,7 +7250,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis4) { HInstruction* goto_loop_left = new (GetAllocator()) HGoto(); loop_if_left->AddInstruction(goto_loop_left); - HInstruction* write_loop_right = MakeIFieldSet(new_inst, c5, MemberOffset(32)); + HInstanceFieldSet* write_loop_right = MakeIFieldSet(new_inst, c5, MemberOffset(32)); HInstruction* goto_loop_right = new (GetAllocator()) HGoto(); loop_if_right->AddInstruction(write_loop_right); loop_if_right->AddInstruction(goto_loop_right); @@ -7257,12 +7271,12 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis4) { FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); EXPECT_INS_REMOVED(read_bottom) << *read_bottom; ASSERT_TRUE(pred_get != nullptr); - HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi(); + HPhi* inst_return_phi = pred_get->GetTarget()->AsPhiOrNull(); ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs(); EXPECT_INS_EQ(inst_return_phi->InputAt(0), graph_->GetNullConstant()); EXPECT_INS_EQ(inst_return_phi->InputAt(1), FindSingleInstruction<HNewInstance>(graph_, escape->GetSinglePredecessor())); - HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi(); + HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhiOrNull(); ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs(); HPhi* loop_header_phi = FindSingleInstruction<HPhi>(graph_, loop_header); HPhi* loop_merge_phi = FindSingleInstruction<HPhi>(graph_, loop_merge); @@ -7272,9 +7286,9 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis4) { EXPECT_INS_EQ(loop_merge_phi->InputAt(0), loop_header_phi); EXPECT_INS_EQ(loop_merge_phi->InputAt(1), c5); EXPECT_INS_RETAINED(write_loop_right) << *write_loop_right; - EXPECT_TRUE(write_loop_right->AsInstanceFieldSet()->GetIsPredicatedSet()) << *write_loop_right; + EXPECT_TRUE(write_loop_right->GetIsPredicatedSet()) << *write_loop_right; EXPECT_INS_RETAINED(write_pre_header) << *write_pre_header; - EXPECT_TRUE(write_pre_header->AsInstanceFieldSet()->GetIsPredicatedSet()) << *write_pre_header; + EXPECT_TRUE(write_pre_header->GetIsPredicatedSet()) << *write_pre_header; } // // ENTRY @@ -7401,12 +7415,12 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis5) { FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); EXPECT_INS_REMOVED(read_bottom) << *read_bottom; ASSERT_TRUE(pred_get != nullptr); - HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi(); + HPhi* inst_return_phi = pred_get->GetTarget()->AsPhiOrNull(); ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs(); EXPECT_INS_EQ(inst_return_phi->InputAt(0), graph_->GetNullConstant()); EXPECT_INS_EQ(inst_return_phi->InputAt(1), FindSingleInstruction<HNewInstance>(graph_, escape->GetSinglePredecessor())); - HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi(); + HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhiOrNull(); ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs(); HPhi* loop_header_phi = FindSingleInstruction<HPhi>(graph_, loop_header); HPhi* loop_merge_phi = FindSingleInstruction<HPhi>(graph_, loop_merge); @@ -7562,7 +7576,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis6) { FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); EXPECT_INS_REMOVED(read_bottom) << *read_bottom; ASSERT_TRUE(pred_get != nullptr); - HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi(); + HPhi* inst_return_phi = pred_get->GetTarget()->AsPhiOrNull(); ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs(); EXPECT_INS_EQ(inst_return_phi->InputAt(0), FindSingleInstruction<HNewInstance>(graph_, escape->GetSinglePredecessor())); @@ -8268,13 +8282,13 @@ TEST_P(UsesOrderDependentTestGroup, RecordPredicatedReplacements1) { FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, middle); ASSERT_NE(replacement_middle_read, nullptr); ASSERT_TRUE(replacement_middle_read->GetTarget()->IsPhi()); - ASSERT_EQ(2u, replacement_middle_read->GetTarget()->AsPhi()->InputCount()); - ASSERT_INS_EQ(replacement_middle_read->GetTarget()->AsPhi()->InputAt(0), replacement_new_inst); - ASSERT_INS_EQ(replacement_middle_read->GetTarget()->AsPhi()->InputAt(1), cnull); + ASSERT_EQ(2u, replacement_middle_read->GetTarget()->InputCount()); + ASSERT_INS_EQ(replacement_middle_read->GetTarget()->InputAt(0), replacement_new_inst); + ASSERT_INS_EQ(replacement_middle_read->GetTarget()->InputAt(1), cnull); ASSERT_TRUE(replacement_middle_read->GetDefaultValue()->IsPhi()); - ASSERT_EQ(2u, replacement_middle_read->GetDefaultValue()->AsPhi()->InputCount()); - ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->AsPhi()->InputAt(0), c0); - ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->AsPhi()->InputAt(1), c11); + ASSERT_EQ(2u, replacement_middle_read->GetDefaultValue()->InputCount()); + ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->InputAt(0), c0); + ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->InputAt(1), c11); EXPECT_INS_RETAINED(left2_write); ASSERT_TRUE(left2_write->GetIsPredicatedSet()); @@ -8285,9 +8299,9 @@ TEST_P(UsesOrderDependentTestGroup, RecordPredicatedReplacements1) { ASSERT_NE(replacement_breturn_read, nullptr); ASSERT_INS_EQ(replacement_breturn_read->GetTarget(), replacement_middle_read->GetTarget()); ASSERT_TRUE(replacement_breturn_read->GetDefaultValue()->IsPhi()); - ASSERT_EQ(2u, replacement_breturn_read->GetDefaultValue()->AsPhi()->InputCount()); - ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->AsPhi()->InputAt(0), c33); - HInstruction* other_input = replacement_breturn_read->GetDefaultValue()->AsPhi()->InputAt(1); + ASSERT_EQ(2u, replacement_breturn_read->GetDefaultValue()->InputCount()); + ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->InputAt(0), c33); + HInstruction* other_input = replacement_breturn_read->GetDefaultValue()->InputAt(1); ASSERT_NE(other_input->GetBlock(), nullptr) << GetParam(); ASSERT_INS_EQ(other_input, replacement_middle_read); } @@ -8423,13 +8437,13 @@ TEST_P(UsesOrderDependentTestGroup, RecordPredicatedReplacements2) { FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, middle); ASSERT_NE(replacement_middle_read, nullptr); ASSERT_TRUE(replacement_middle_read->GetTarget()->IsPhi()); - ASSERT_EQ(2u, replacement_middle_read->GetTarget()->AsPhi()->InputCount()); - ASSERT_INS_EQ(replacement_middle_read->GetTarget()->AsPhi()->InputAt(0), replacement_new_inst); - ASSERT_INS_EQ(replacement_middle_read->GetTarget()->AsPhi()->InputAt(1), cnull); + ASSERT_EQ(2u, replacement_middle_read->GetTarget()->InputCount()); + ASSERT_INS_EQ(replacement_middle_read->GetTarget()->InputAt(0), replacement_new_inst); + ASSERT_INS_EQ(replacement_middle_read->GetTarget()->InputAt(1), cnull); ASSERT_TRUE(replacement_middle_read->GetDefaultValue()->IsPhi()); - ASSERT_EQ(2u, replacement_middle_read->GetDefaultValue()->AsPhi()->InputCount()); - ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->AsPhi()->InputAt(0), c0); - ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->AsPhi()->InputAt(1), c11); + ASSERT_EQ(2u, replacement_middle_read->GetDefaultValue()->InputCount()); + ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->InputAt(0), c0); + ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->InputAt(1), c11); EXPECT_INS_RETAINED(left2_call); @@ -8627,13 +8641,13 @@ TEST_P(UsesOrderDependentTestGroupForThreeItems, RecordPredicatedReplacements3) FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, middle1); ASSERT_NE(replacement_middle1_read, nullptr); ASSERT_TRUE(replacement_middle1_read->GetTarget()->IsPhi()); - ASSERT_EQ(2u, replacement_middle1_read->GetTarget()->AsPhi()->InputCount()); - ASSERT_INS_EQ(replacement_middle1_read->GetTarget()->AsPhi()->InputAt(0), replacement_new_inst); - ASSERT_INS_EQ(replacement_middle1_read->GetTarget()->AsPhi()->InputAt(1), cnull); + ASSERT_EQ(2u, replacement_middle1_read->GetTarget()->InputCount()); + ASSERT_INS_EQ(replacement_middle1_read->GetTarget()->InputAt(0), replacement_new_inst); + ASSERT_INS_EQ(replacement_middle1_read->GetTarget()->InputAt(1), cnull); ASSERT_TRUE(replacement_middle1_read->GetDefaultValue()->IsPhi()); - ASSERT_EQ(2u, replacement_middle1_read->GetDefaultValue()->AsPhi()->InputCount()); - ASSERT_INS_EQ(replacement_middle1_read->GetDefaultValue()->AsPhi()->InputAt(0), c0); - ASSERT_INS_EQ(replacement_middle1_read->GetDefaultValue()->AsPhi()->InputAt(1), c11); + ASSERT_EQ(2u, replacement_middle1_read->GetDefaultValue()->InputCount()); + ASSERT_INS_EQ(replacement_middle1_read->GetDefaultValue()->InputAt(0), c0); + ASSERT_INS_EQ(replacement_middle1_read->GetDefaultValue()->InputAt(1), c11); EXPECT_INS_RETAINED(left2_call); @@ -8652,11 +8666,10 @@ TEST_P(UsesOrderDependentTestGroupForThreeItems, RecordPredicatedReplacements3) FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); ASSERT_NE(replacement_breturn_read, nullptr); ASSERT_INS_EQ(replacement_breturn_read->GetTarget(), replacement_middle1_read->GetTarget()); - ASSERT_EQ(2u, replacement_breturn_read->GetDefaultValue()->AsPhi()->InputCount()); - ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->AsPhi()->InputAt(0), - replacement_left3_read); - ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->AsPhi()->InputAt(1), - replacement_middle1_read); + ASSERT_TRUE(replacement_breturn_read->GetDefaultValue()->IsPhi()); + ASSERT_EQ(2u, replacement_breturn_read->GetDefaultValue()->InputCount()); + ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->InputAt(0), replacement_left3_read); + ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->InputAt(1), replacement_middle1_read); EXPECT_INS_RETAINED(breturn_add1); ASSERT_INS_EQ(breturn_add1->InputAt(0), replacement_middle1_read); ASSERT_INS_EQ(breturn_add1->InputAt(1), replacement_breturn_read); diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index f40b7f4f0c..4189bc4053 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -62,7 +62,7 @@ Location Location::RegisterOrConstant(HInstruction* instruction) { } Location Location::RegisterOrInt32Constant(HInstruction* instruction) { - HConstant* constant = instruction->AsConstant(); + HConstant* constant = instruction->AsConstantOrNull(); if (constant != nullptr) { int64_t value = CodeGenerator::GetInt64ValueOf(constant); if (IsInt<32>(value)) { @@ -73,7 +73,7 @@ Location Location::RegisterOrInt32Constant(HInstruction* instruction) { } Location Location::FpuRegisterOrInt32Constant(HInstruction* instruction) { - HConstant* constant = instruction->AsConstant(); + HConstant* constant = instruction->AsConstantOrNull(); if (constant != nullptr) { int64_t value = CodeGenerator::GetInt64ValueOf(constant); if (IsInt<32>(value)) { diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc index 95e81533da..6163624a97 100644 --- a/compiler/optimizing/loop_analysis.cc +++ b/compiler/optimizing/loop_analysis.cc @@ -42,7 +42,7 @@ void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info, // not cause loop peeling to happen as they either cannot be inside a loop, or by // definition cannot be loop exits (unconditional instructions), or are not beneficial for // the optimization. - HIf* hif = block->GetLastInstruction()->AsIf(); + HIf* hif = block->GetLastInstruction()->AsIfOrNull(); if (hif != nullptr && !loop_info->Contains(*hif->InputAt(0)->GetBlock())) { analysis_results->invariant_exits_num_++; } @@ -259,7 +259,7 @@ class X86_64LoopHelper : public ArchDefaultLoopHelper { case HInstruction::InstructionKind::kVecReplicateScalar: return 2; case HInstruction::InstructionKind::kVecExtractScalar: - return 1; + return 1; case HInstruction::InstructionKind::kVecReduce: return 4; case HInstruction::InstructionKind::kVecNeg: diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h index cec00fecf4..cd8f00588d 100644 --- a/compiler/optimizing/loop_analysis.h +++ b/compiler/optimizing/loop_analysis.h @@ -148,13 +148,15 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> { // // Returns 'true' by default, should be overridden by particular target loop helper. virtual bool IsLoopNonBeneficialForScalarOpts( - LoopAnalysisInfo* loop_analysis_info ATTRIBUTE_UNUSED) const { return true; } + [[maybe_unused]] LoopAnalysisInfo* loop_analysis_info) const { + return true; + } // Returns optimal scalar unrolling factor for the loop. // // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper. virtual uint32_t GetScalarUnrollingFactor( - const LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const { + [[maybe_unused]] const LoopAnalysisInfo* analysis_info) const { return LoopAnalysisInfo::kNoUnrollingFactor; } @@ -166,17 +168,17 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> { // Returns whether it is beneficial to fully unroll the loop. // // Returns 'false' by default, should be overridden by particular target loop helper. - virtual bool IsFullUnrollingBeneficial(LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const { + virtual bool IsFullUnrollingBeneficial([[maybe_unused]] LoopAnalysisInfo* analysis_info) const { return false; } // Returns optimal SIMD unrolling factor for the loop. // // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper. - virtual uint32_t GetSIMDUnrollingFactor(HBasicBlock* block ATTRIBUTE_UNUSED, - int64_t trip_count ATTRIBUTE_UNUSED, - uint32_t max_peel ATTRIBUTE_UNUSED, - uint32_t vector_length ATTRIBUTE_UNUSED) const { + virtual uint32_t GetSIMDUnrollingFactor([[maybe_unused]] HBasicBlock* block, + [[maybe_unused]] int64_t trip_count, + [[maybe_unused]] uint32_t max_peel, + [[maybe_unused]] uint32_t vector_length) const { return LoopAnalysisInfo::kNoUnrollingFactor; } diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 7a52502562..f6d69ca789 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -366,8 +366,8 @@ static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) { return (restrictions & tested) != 0; } -// Insert an instruction. -static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) { +// Insert an instruction at the end of the block, with safe checks. +inline HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) { DCHECK(block != nullptr); DCHECK(instruction != nullptr); block->InsertInstructionBefore(instruction, block->GetLastInstruction()); @@ -418,7 +418,7 @@ static void TryToEvaluateIfCondition(HIf* instruction, HGraph* graph) { ++it; if (true_succ->Dominates(user_block)) { user->ReplaceInput(graph->GetIntConstant(1), index); - } else if (false_succ->Dominates(user_block)) { + } else if (false_succ->Dominates(user_block)) { user->ReplaceInput(graph->GetIntConstant(0), index); } } @@ -453,6 +453,54 @@ static DataType::Type GetNarrowerType(HInstruction* a, HInstruction* b) { return type; } +// Returns whether the loop is of a diamond structure: +// +// header <----------------+ +// | | +// diamond_hif | +// / \ | +// diamond_true diamond_false | +// \ / | +// back_edge | +// | | +// +---------------------+ +static bool HasLoopDiamondStructure(HLoopInformation* loop_info) { + HBasicBlock* header = loop_info->GetHeader(); + if (loop_info->NumberOfBackEdges() != 1 || header->GetSuccessors().size() != 2) { + return false; + } + HBasicBlock* header_succ_0 = header->GetSuccessors()[0]; + HBasicBlock* header_succ_1 = header->GetSuccessors()[1]; + HBasicBlock* diamond_top = loop_info->Contains(*header_succ_0) ? + header_succ_0 : + header_succ_1; + if (!diamond_top->GetLastInstruction()->IsIf()) { + return false; + } + + HIf* diamond_hif = diamond_top->GetLastInstruction()->AsIf(); + HBasicBlock* diamond_true = diamond_hif->IfTrueSuccessor(); + HBasicBlock* diamond_false = diamond_hif->IfFalseSuccessor(); + + if (diamond_true->GetSuccessors().size() != 1 || diamond_false->GetSuccessors().size() != 1) { + return false; + } + + HBasicBlock* back_edge = diamond_true->GetSingleSuccessor(); + if (back_edge != diamond_false->GetSingleSuccessor() || + back_edge != loop_info->GetBackEdges()[0]) { + return false; + } + + DCHECK_EQ(loop_info->GetBlocks().NumSetBits(), 5u); + return true; +} + +static bool IsPredicatedLoopControlFlowSupported(HLoopInformation* loop_info) { + size_t num_of_blocks = loop_info->GetBlocks().NumSetBits(); + return num_of_blocks == 2 || HasLoopDiamondStructure(loop_info); +} + // // Public methods. // @@ -482,6 +530,8 @@ HLoopOptimization::HLoopOptimization(HGraph* graph, vector_runtime_test_b_(nullptr), vector_map_(nullptr), vector_permanent_map_(nullptr), + vector_external_set_(nullptr), + predicate_info_map_(nullptr), vector_mode_(kSequential), vector_preheader_(nullptr), vector_header_(nullptr), @@ -542,12 +592,17 @@ bool HLoopOptimization::LocalRun() { std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization)); ScopedArenaSafeMap<HInstruction*, HInstruction*> perm( std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization)); + ScopedArenaSet<HInstruction*> ext_set(loop_allocator_->Adapter(kArenaAllocLoopOptimization)); + ScopedArenaSafeMap<HBasicBlock*, BlockPredicateInfo*> pred( + std::less<HBasicBlock*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization)); // Attach. iset_ = &iset; reductions_ = &reds; vector_refs_ = &refs; vector_map_ = ↦ vector_permanent_map_ = &perm; + vector_external_set_ = &ext_set; + predicate_info_map_ = &pred; // Traverse. const bool did_loop_opt = TraverseLoopsInnerToOuter(top_loop_); // Detach. @@ -556,6 +611,9 @@ bool HLoopOptimization::LocalRun() { vector_refs_ = nullptr; vector_map_ = nullptr; vector_permanent_map_ = nullptr; + vector_external_set_ = nullptr; + predicate_info_map_ = nullptr; + return did_loop_opt; } @@ -787,6 +845,37 @@ void HLoopOptimization::SimplifyBlocks(LoopNode* node) { } } +// Checks whether the loop has exit structure suitable for InnerLoopFinite optimization: +// - has single loop exit. +// - the exit block has only single predecessor - a block inside the loop. +// +// In that case returns single exit basic block (outside the loop); otherwise nullptr. +static HBasicBlock* GetInnerLoopFiniteSingleExit(HLoopInformation* loop_info) { + HBasicBlock* exit = nullptr; + for (HBlocksInLoopIterator block_it(*loop_info); + !block_it.Done(); + block_it.Advance()) { + HBasicBlock* block = block_it.Current(); + + // Check whether one of the successor is loop exit. + for (HBasicBlock* successor : block->GetSuccessors()) { + if (!loop_info->Contains(*successor)) { + if (exit != nullptr) { + // The loop has more than one exit. + return nullptr; + } + exit = successor; + + // Ensure exit can only be reached by exiting loop. + if (successor->GetPredecessors().size() != 1) { + return nullptr; + } + } + } + } + return exit; +} + bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) { HBasicBlock* header = node->loop_info->GetHeader(); HBasicBlock* preheader = node->loop_info->GetPreHeader(); @@ -795,33 +884,22 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) { if (!induction_range_.IsFinite(node->loop_info, &trip_count)) { return false; } - // Ensure there is only a single loop-body (besides the header). - HBasicBlock* body = nullptr; - for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) { - if (it.Current() != header) { - if (body != nullptr) { - return false; - } - body = it.Current(); - } - } - CHECK(body != nullptr); - // Ensure there is only a single exit point. - if (header->GetSuccessors().size() != 2) { - return false; - } - HBasicBlock* exit = (header->GetSuccessors()[0] == body) - ? header->GetSuccessors()[1] - : header->GetSuccessors()[0]; - // Ensure exit can only be reached by exiting loop. - if (exit->GetPredecessors().size() != 1) { + // Check loop exits. + HBasicBlock* exit = GetInnerLoopFiniteSingleExit(node->loop_info); + if (exit == nullptr) { return false; } + + HBasicBlock* body = (header->GetSuccessors()[0] == exit) + ? header->GetSuccessors()[1] + : header->GetSuccessors()[0]; // Detect either an empty loop (no side effects other than plain iteration) or // a trivial loop (just iterating once). Replace subsequent index uses, if any, // with the last value and remove the loop, possibly after unrolling its body. HPhi* main_phi = nullptr; - if (TrySetSimpleLoopHeader(header, &main_phi)) { + size_t num_of_blocks = header->GetLoopInformation()->GetBlocks().NumSetBits(); + + if (num_of_blocks == 2 && TrySetSimpleLoopHeader(header, &main_phi)) { bool is_empty = IsEmptyBody(body); if (reductions_->empty() && // TODO: possible with some effort (is_empty || trip_count == 1) && @@ -845,21 +923,61 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) { } } // Vectorize loop, if possible and valid. - if (kEnableVectorization && + if (!kEnableVectorization || // Disable vectorization for debuggable graphs: this is a workaround for the bug // in 'GenerateNewLoop' which caused the SuspendCheck environment to be invalid. // TODO: b/138601207, investigate other possible cases with wrong environment values and // possibly switch back vectorization on for debuggable graphs. - !graph_->IsDebuggable() && - TrySetSimpleLoopHeader(header, &main_phi) && - ShouldVectorize(node, body, trip_count) && - TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) { - Vectorize(node, body, exit, trip_count); - graph_->SetHasSIMD(true); // flag SIMD usage - MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorized); - return true; + graph_->IsDebuggable()) { + return false; + } + + if (IsInPredicatedVectorizationMode()) { + return TryVectorizePredicated(node, body, exit, main_phi, trip_count); + } else { + return TryVectorizedTraditional(node, body, exit, main_phi, trip_count); } - return false; +} + +bool HLoopOptimization::TryVectorizePredicated(LoopNode* node, + HBasicBlock* body, + HBasicBlock* exit, + HPhi* main_phi, + int64_t trip_count) { + if (!IsPredicatedLoopControlFlowSupported(node->loop_info) || + !ShouldVectorizeCommon(node, main_phi, trip_count)) { + return false; + } + + // Currently we can only generate cleanup loops for loops with 2 basic block. + // + // TODO: Support array disambiguation tests for CF loops. + if (NeedsArrayRefsDisambiguationTest() && + node->loop_info->GetBlocks().NumSetBits() != 2) { + return false; + } + + VectorizePredicated(node, body, exit); + MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorized); + graph_->SetHasPredicatedSIMD(true); // flag SIMD usage + return true; +} + +bool HLoopOptimization::TryVectorizedTraditional(LoopNode* node, + HBasicBlock* body, + HBasicBlock* exit, + HPhi* main_phi, + int64_t trip_count) { + HBasicBlock* header = node->loop_info->GetHeader(); + size_t num_of_blocks = header->GetLoopInformation()->GetBlocks().NumSetBits(); + + if (num_of_blocks != 2 || !ShouldVectorizeCommon(node, main_phi, trip_count)) { + return false; + } + VectorizeTraditional(node, body, exit, trip_count); + MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorized); + graph_->SetHasTraditionalSIMD(true); // flag SIMD usage + return true; } bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { @@ -1006,7 +1124,10 @@ bool HLoopOptimization::TryLoopScalarOpts(LoopNode* node) { // Intel Press, June, 2004 (http://www.aartbik.com/). // -bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count) { + +bool HLoopOptimization::CanVectorizeDataFlow(LoopNode* node, + HBasicBlock* header, + bool collect_alignment_info) { // Reset vector bookkeeping. vector_length_ = 0; vector_refs_->clear(); @@ -1015,16 +1136,30 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6 vector_runtime_test_a_ = vector_runtime_test_b_ = nullptr; - // Phis in the loop-body prevent vectorization. - if (!block->GetPhis().IsEmpty()) { - return false; - } + // Traverse the data flow of the loop, in the original program order. + for (HBlocksInLoopReversePostOrderIterator block_it(*header->GetLoopInformation()); + !block_it.Done(); + block_it.Advance()) { + HBasicBlock* block = block_it.Current(); - // Scan the loop-body, starting a right-hand-side tree traversal at each left-hand-side - // occurrence, which allows passing down attributes down the use tree. - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - if (!VectorizeDef(node, it.Current(), /*generate_code*/ false)) { - return false; // failure to vectorize a left-hand-side + if (block == header) { + // The header is of a certain structure (TrySetSimpleLoopHeader) and doesn't need to be + // processed here. + continue; + } + + // Phis in the loop-body prevent vectorization. + // TODO: Enable vectorization of CF loops with Phis. + if (!block->GetPhis().IsEmpty()) { + return false; + } + + // Scan the loop-body instructions, starting a right-hand-side tree traversal at each + // left-hand-side occurrence, which allows passing down attributes down the use tree. + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + if (!VectorizeDef(node, it.Current(), /*generate_code*/ false)) { + return false; // failure to vectorize a left-hand-side + } } } @@ -1111,24 +1246,123 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6 } } // for i - if (!IsInPredicatedVectorizationMode()) { - // Find a suitable alignment strategy. + if (collect_alignment_info) { + // Update the info on alignment strategy. SetAlignmentStrategy(peeling_votes, peeling_candidate); } - // Does vectorization seem profitable? - if (!IsVectorizationProfitable(trip_count)) { + // Success! + return true; +} + +bool HLoopOptimization::ShouldVectorizeCommon(LoopNode* node, + HPhi* main_phi, + int64_t trip_count) { + HBasicBlock* header = node->loop_info->GetHeader(); + HBasicBlock* preheader = node->loop_info->GetPreHeader(); + + bool enable_alignment_strategies = !IsInPredicatedVectorizationMode(); + if (!TrySetSimpleLoopHeader(header, &main_phi) || + !CanVectorizeDataFlow(node, header, enable_alignment_strategies) || + !IsVectorizationProfitable(trip_count) || + !TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) { return false; } - // Success! return true; } -void HLoopOptimization::Vectorize(LoopNode* node, - HBasicBlock* block, - HBasicBlock* exit, - int64_t trip_count) { +void HLoopOptimization::VectorizePredicated(LoopNode* node, + HBasicBlock* block, + HBasicBlock* exit) { + DCHECK(IsInPredicatedVectorizationMode()); + + HBasicBlock* header = node->loop_info->GetHeader(); + HBasicBlock* preheader = node->loop_info->GetPreHeader(); + + // Adjust vector bookkeeping. + HPhi* main_phi = nullptr; + bool is_simple_loop_header = TrySetSimpleLoopHeader(header, &main_phi); // refills sets + DCHECK(is_simple_loop_header); + vector_header_ = header; + vector_body_ = block; + + // Loop induction type. + DataType::Type induc_type = main_phi->GetType(); + DCHECK(induc_type == DataType::Type::kInt32 || induc_type == DataType::Type::kInt64) + << induc_type; + + // Generate loop control: + // stc = <trip-count>; + // vtc = <vector trip-count> + HInstruction* stc = induction_range_.GenerateTripCount(node->loop_info, graph_, preheader); + HInstruction* vtc = stc; + vector_index_ = graph_->GetConstant(induc_type, 0); + bool needs_disambiguation_test = false; + // Generate runtime disambiguation test: + // vtc = a != b ? vtc : 0; + if (NeedsArrayRefsDisambiguationTest()) { + HInstruction* rt = Insert( + preheader, + new (global_allocator_) HNotEqual(vector_runtime_test_a_, vector_runtime_test_b_)); + vtc = Insert(preheader, + new (global_allocator_) + HSelect(rt, vtc, graph_->GetConstant(induc_type, 0), kNoDexPc)); + needs_disambiguation_test = true; + } + + // Generate vector loop: + // for ( ; i < vtc; i += vector_length) + // <vectorized-loop-body> + HBasicBlock* preheader_for_vector_loop = + graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit); + vector_mode_ = kVector; + GenerateNewLoopPredicated(node, + preheader_for_vector_loop, + vector_index_, + vtc, + graph_->GetConstant(induc_type, vector_length_)); + + // Generate scalar loop, if needed: + // for ( ; i < stc; i += 1) + // <loop-body> + if (needs_disambiguation_test) { + vector_mode_ = kSequential; + HBasicBlock* preheader_for_cleanup_loop = + graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit); + // Use "Traditional" version for the sequential loop. + GenerateNewLoopScalarOrTraditional(node, + preheader_for_cleanup_loop, + vector_index_, + stc, + graph_->GetConstant(induc_type, 1), + LoopAnalysisInfo::kNoUnrollingFactor); + } + + FinalizeVectorization(node); + + // Assign governing predicates for the predicated instructions inserted during vectorization + // outside the loop. + for (auto it : *vector_external_set_) { + DCHECK(it->IsVecOperation()); + HVecOperation* vec_op = it->AsVecOperation(); + + HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_, + graph_->GetIntConstant(1), + vec_op->GetPackedType(), + vec_op->GetVectorLength(), + 0u); + vec_op->GetBlock()->InsertInstructionBefore(set_pred, vec_op); + vec_op->SetMergingGoverningPredicate(set_pred); + } +} + +void HLoopOptimization::VectorizeTraditional(LoopNode* node, + HBasicBlock* block, + HBasicBlock* exit, + int64_t trip_count) { + DCHECK(!IsInPredicatedVectorizationMode()); + HBasicBlock* header = node->loop_info->GetHeader(); HBasicBlock* preheader = node->loop_info->GetPreHeader(); @@ -1141,7 +1375,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, // A cleanup loop is needed, at least, for any unknown trip count or // for a known trip count with remainder iterations after vectorization. - bool needs_cleanup = !IsInPredicatedVectorizationMode() && + bool needs_cleanup = (trip_count == 0 || ((trip_count - vector_static_peeling_factor_) % chunk) != 0); // Adjust vector bookkeeping. @@ -1160,13 +1394,11 @@ void HLoopOptimization::Vectorize(LoopNode* node, // ptc = <peeling factor>; HInstruction* ptc = nullptr; if (vector_static_peeling_factor_ != 0) { - DCHECK(!IsInPredicatedVectorizationMode()); // Static loop peeling for SIMD alignment (using the most suitable // fixed peeling factor found during prior alignment analysis). DCHECK(vector_dynamic_peeling_candidate_ == nullptr); ptc = graph_->GetConstant(induc_type, vector_static_peeling_factor_); } else if (vector_dynamic_peeling_candidate_ != nullptr) { - DCHECK(!IsInPredicatedVectorizationMode()); // Dynamic loop peeling for SIMD alignment (using the most suitable // candidate found during prior alignment analysis): // rem = offset % ALIGN; // adjusted as #elements @@ -1197,7 +1429,6 @@ void HLoopOptimization::Vectorize(LoopNode* node, HInstruction* stc = induction_range_.GenerateTripCount(node->loop_info, graph_, preheader); HInstruction* vtc = stc; if (needs_cleanup) { - DCHECK(!IsInPredicatedVectorizationMode()); DCHECK(IsPowerOfTwo(chunk)); HInstruction* diff = stc; if (ptc != nullptr) { @@ -1217,7 +1448,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, // Generate runtime disambiguation test: // vtc = a != b ? vtc : 0; - if (vector_runtime_test_a_ != nullptr) { + if (NeedsArrayRefsDisambiguationTest()) { HInstruction* rt = Insert( preheader, new (global_allocator_) HNotEqual(vector_runtime_test_a_, vector_runtime_test_b_)); @@ -1235,45 +1466,52 @@ void HLoopOptimization::Vectorize(LoopNode* node, // moved around during suspend checks, since all analysis was based on // nothing more than the Android runtime alignment conventions. if (ptc != nullptr) { - DCHECK(!IsInPredicatedVectorizationMode()); vector_mode_ = kSequential; - GenerateNewLoop(node, - block, - graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit), - vector_index_, - ptc, - graph_->GetConstant(induc_type, 1), - LoopAnalysisInfo::kNoUnrollingFactor); + HBasicBlock* preheader_for_peeling_loop = + graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit); + GenerateNewLoopScalarOrTraditional(node, + preheader_for_peeling_loop, + vector_index_, + ptc, + graph_->GetConstant(induc_type, 1), + LoopAnalysisInfo::kNoUnrollingFactor); } // Generate vector loop, possibly further unrolled: // for ( ; i < vtc; i += chunk) // <vectorized-loop-body> vector_mode_ = kVector; - GenerateNewLoop(node, - block, - graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit), - vector_index_, - vtc, - graph_->GetConstant(induc_type, vector_length_), // increment per unroll - unroll); - HLoopInformation* vloop = vector_header_->GetLoopInformation(); + HBasicBlock* preheader_for_vector_loop = + graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit); + GenerateNewLoopScalarOrTraditional(node, + preheader_for_vector_loop, + vector_index_, + vtc, + graph_->GetConstant(induc_type, vector_length_), // per unroll + unroll); // Generate cleanup loop, if needed: // for ( ; i < stc; i += 1) // <loop-body> if (needs_cleanup) { - DCHECK_IMPLIES(IsInPredicatedVectorizationMode(), vector_runtime_test_a_ != nullptr); vector_mode_ = kSequential; - GenerateNewLoop(node, - block, - graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit), - vector_index_, - stc, - graph_->GetConstant(induc_type, 1), - LoopAnalysisInfo::kNoUnrollingFactor); + HBasicBlock* preheader_for_cleanup_loop = + graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit); + GenerateNewLoopScalarOrTraditional(node, + preheader_for_cleanup_loop, + vector_index_, + stc, + graph_->GetConstant(induc_type, 1), + LoopAnalysisInfo::kNoUnrollingFactor); } + FinalizeVectorization(node); +} + +void HLoopOptimization::FinalizeVectorization(LoopNode* node) { + HBasicBlock* header = node->loop_info->GetHeader(); + HBasicBlock* preheader = node->loop_info->GetPreHeader(); + HLoopInformation* vloop = vector_header_->GetLoopInformation(); // Link reductions to their final uses. for (auto i = reductions_->begin(); i != reductions_->end(); ++i) { if (i->first->IsPhi()) { @@ -1287,9 +1525,17 @@ void HLoopOptimization::Vectorize(LoopNode* node, } } - // Remove the original loop by disconnecting the body block - // and removing all instructions from the header. - block->DisconnectAndDelete(); + // Remove the original loop. + for (HBlocksInLoopPostOrderIterator it_loop(*node->loop_info); + !it_loop.Done(); + it_loop.Advance()) { + HBasicBlock* cur_block = it_loop.Current(); + if (cur_block == node->loop_info->GetHeader()) { + continue; + } + cur_block->DisconnectAndDelete(); + } + while (!header->GetFirstInstruction()->IsGoto()) { header->RemoveInstruction(header->GetFirstInstruction()); } @@ -1301,14 +1547,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, node->loop_info = vloop; } -void HLoopOptimization::GenerateNewLoop(LoopNode* node, - HBasicBlock* block, - HBasicBlock* new_preheader, - HInstruction* lo, - HInstruction* hi, - HInstruction* step, - uint32_t unroll) { - DCHECK(unroll == 1 || vector_mode_ == kVector); +HPhi* HLoopOptimization::InitializeForNewLoop(HBasicBlock* new_preheader, HInstruction* lo) { DataType::Type induc_type = lo->GetType(); // Prepare new loop. vector_preheader_ = new_preheader, @@ -1318,68 +1557,160 @@ void HLoopOptimization::GenerateNewLoop(LoopNode* node, kNoRegNumber, 0, HPhi::ToPhiType(induc_type)); - // Generate header and prepare body. - // for (i = lo; i < hi; i += step) - // <loop-body> - HInstruction* cond = nullptr; - HInstruction* set_pred = nullptr; - if (IsInPredicatedVectorizationMode()) { - HVecPredWhile* pred_while = - new (global_allocator_) HVecPredWhile(global_allocator_, - phi, - hi, - HVecPredWhile::CondKind::kLO, - DataType::Type::kInt32, - vector_length_, - 0u); - - cond = new (global_allocator_) HVecPredCondition(global_allocator_, - pred_while, - HVecPredCondition::PCondKind::kNFirst, - DataType::Type::kInt32, - vector_length_, - 0u); - - vector_header_->AddPhi(phi); - vector_header_->AddInstruction(pred_while); - vector_header_->AddInstruction(cond); - set_pred = pred_while; - } else { - cond = new (global_allocator_) HAboveOrEqual(phi, hi); - vector_header_->AddPhi(phi); - vector_header_->AddInstruction(cond); - } + vector_header_->AddPhi(phi); + vector_index_ = phi; + vector_permanent_map_->clear(); + vector_external_set_->clear(); + predicate_info_map_->clear(); + + return phi; +} +void HLoopOptimization::GenerateNewLoopScalarOrTraditional(LoopNode* node, + HBasicBlock* new_preheader, + HInstruction* lo, + HInstruction* hi, + HInstruction* step, + uint32_t unroll) { + DCHECK(unroll == 1 || vector_mode_ == kVector); + DataType::Type induc_type = lo->GetType(); + HPhi* phi = InitializeForNewLoop(new_preheader, lo); + + // Generate loop exit check. + HInstruction* cond = new (global_allocator_) HAboveOrEqual(phi, hi); + vector_header_->AddInstruction(cond); vector_header_->AddInstruction(new (global_allocator_) HIf(cond)); - vector_index_ = phi; - vector_permanent_map_->clear(); // preserved over unrolling + for (uint32_t u = 0; u < unroll; u++) { - // Generate instruction map. - vector_map_->clear(); - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + GenerateNewLoopBodyOnce(node, induc_type, step); + } + + FinalizePhisForNewLoop(phi, lo); +} + +void HLoopOptimization::GenerateNewLoopPredicated(LoopNode* node, + HBasicBlock* new_preheader, + HInstruction* lo, + HInstruction* hi, + HInstruction* step) { + DCHECK(IsInPredicatedVectorizationMode()); + DCHECK_EQ(vector_mode_, kVector); + DataType::Type induc_type = lo->GetType(); + HPhi* phi = InitializeForNewLoop(new_preheader, lo); + + // Generate loop exit check. + HVecPredWhile* pred_while = + new (global_allocator_) HVecPredWhile(global_allocator_, + phi, + hi, + HVecPredWhile::CondKind::kLO, + DataType::Type::kInt32, + vector_length_, + 0u); + + HInstruction* cond = + new (global_allocator_) HVecPredToBoolean(global_allocator_, + pred_while, + HVecPredToBoolean::PCondKind::kNFirst, + DataType::Type::kInt32, + vector_length_, + 0u); + + vector_header_->AddInstruction(pred_while); + vector_header_->AddInstruction(cond); + vector_header_->AddInstruction(new (global_allocator_) HIf(cond)); + + PreparePredicateInfoMap(node); + GenerateNewLoopBodyOnce(node, induc_type, step); + InitPredicateInfoMap(node, pred_while); + + // Assign governing predicates for instructions in the loop; the traversal order doesn't matter. + for (HBlocksInLoopIterator block_it(*node->loop_info); + !block_it.Done(); + block_it.Advance()) { + HBasicBlock* cur_block = block_it.Current(); + + for (HInstructionIterator it(cur_block->GetInstructions()); !it.Done(); it.Advance()) { + auto i = vector_map_->find(it.Current()); + if (i != vector_map_->end()) { + HInstruction* instr = i->second; + + if (!instr->IsVecOperation()) { + continue; + } + // There are cases when a vector instruction, which corresponds to some instruction in the + // original scalar loop, is located not in the newly created vector loop but + // in the vector loop preheader (and hence recorded in vector_external_set_). + // + // Governing predicates will be set for such instructions separately. + bool in_vector_loop = vector_header_->GetLoopInformation()->Contains(*instr->GetBlock()); + DCHECK_IMPLIES(!in_vector_loop, + vector_external_set_->find(instr) != vector_external_set_->end()); + + if (in_vector_loop && + !instr->AsVecOperation()->IsPredicated()) { + HVecOperation* op = instr->AsVecOperation(); + HVecPredSetOperation* pred = predicate_info_map_->Get(cur_block)->GetControlPredicate(); + op->SetMergingGoverningPredicate(pred); + } + } + } + } + + FinalizePhisForNewLoop(phi, lo); +} + +void HLoopOptimization::GenerateNewLoopBodyOnce(LoopNode* node, + DataType::Type induc_type, + HInstruction* step) { + // Generate instruction map. + vector_map_->clear(); + HLoopInformation* loop_info = node->loop_info; + + // Traverse the data flow of the loop, in the original program order. + for (HBlocksInLoopReversePostOrderIterator block_it(*loop_info); + !block_it.Done(); + block_it.Advance()) { + HBasicBlock* cur_block = block_it.Current(); + + if (cur_block == loop_info->GetHeader()) { + continue; + } + + for (HInstructionIterator it(cur_block->GetInstructions()); !it.Done(); it.Advance()) { bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true); DCHECK(vectorized_def); } - // Generate body from the instruction map, but in original program order. - HEnvironment* env = vector_header_->GetFirstInstruction()->GetEnvironment(); - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + } + + // Generate body from the instruction map, in the original program order. + HEnvironment* env = vector_header_->GetFirstInstruction()->GetEnvironment(); + for (HBlocksInLoopReversePostOrderIterator block_it(*loop_info); + !block_it.Done(); + block_it.Advance()) { + HBasicBlock* cur_block = block_it.Current(); + + if (cur_block == loop_info->GetHeader()) { + continue; + } + + for (HInstructionIterator it(cur_block->GetInstructions()); !it.Done(); it.Advance()) { auto i = vector_map_->find(it.Current()); if (i != vector_map_->end() && !i->second->IsInBlock()) { Insert(vector_body_, i->second); - if (IsInPredicatedVectorizationMode() && i->second->IsVecOperation()) { - HVecOperation* op = i->second->AsVecOperation(); - op->SetMergingGoverningPredicate(set_pred); - } // Deal with instructions that need an environment, such as the scalar intrinsics. if (i->second->NeedsEnvironment()) { i->second->CopyEnvironmentFromWithLoopPhiAdjustment(env, vector_header_); } } } - // Generate the induction. - vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step); - Insert(vector_body_, vector_index_); } + // Generate the induction. + vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step); + Insert(vector_body_, vector_index_); +} + +void HLoopOptimization::FinalizePhisForNewLoop(HPhi* phi, HInstruction* lo) { // Finalize phi inputs for the reductions (if any). for (auto i = reductions_->begin(); i != reductions_->end(); ++i) { if (!i->first->IsPhi()) { @@ -1442,10 +1773,13 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, VectorizeDotProdIdiom(node, instruction, generate_code, type, restrictions) || (TrySetVectorType(type, &restrictions) && VectorizeUse(node, instruction, generate_code, type, restrictions))) { + DCHECK(!instruction->IsPhi()); if (generate_code) { - HInstruction* new_red = vector_map_->Get(instruction); - vector_permanent_map_->Put(new_red, vector_map_->Get(redit->second)); - vector_permanent_map_->Overwrite(redit->second, new_red); + HInstruction* new_red_vec_op = vector_map_->Get(instruction); + HInstruction* original_phi = redit->second; + DCHECK(original_phi->IsPhi()); + vector_permanent_map_->Put(new_red_vec_op, vector_map_->Get(original_phi)); + vector_permanent_map_->Overwrite(original_phi, new_red_vec_op); } return true; } @@ -1455,6 +1789,10 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, if (instruction->IsGoto()) { return true; } + + if (instruction->IsIf()) { + return VectorizeIfCondition(node, instruction, generate_code, restrictions); + } // Otherwise accept only expressions with no effects outside the immediate loop-body. // Note that actual uses are inspected during right-hand-side tree traversal. return !IsUsedOutsideLoop(node->loop_info, instruction) @@ -1485,9 +1823,7 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, // Deal with vector restrictions. bool is_string_char_at = instruction->AsArrayGet()->IsStringCharAt(); - if (is_string_char_at && (HasVectorRestrictions(restrictions, kNoStringCharAt) || - IsInPredicatedVectorizationMode())) { - // TODO: Support CharAt for predicated mode. + if (is_string_char_at && (HasVectorRestrictions(restrictions, kNoStringCharAt))) { return false; } // Accept a right-hand-side array base[index] for @@ -1676,6 +2012,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case InstructionSet::kThumb2: // Allow vectorization for all ARM devices, because Android assumes that // ARM 32-bit always supports advanced SIMD (64-bit SIMD). + *restrictions |= kNoIfCond; switch (type) { case DataType::Type::kBool: case DataType::Type::kUint8: @@ -1701,6 +2038,13 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict DCHECK_EQ(simd_register_size_ % DataType::Size(type), 0u); switch (type) { case DataType::Type::kBool: + *restrictions |= kNoDiv | + kNoSignedHAdd | + kNoUnsignedHAdd | + kNoUnroundedHAdd | + kNoSAD | + kNoIfCond; + return TrySetVectorLength(type, vector_length); case DataType::Type::kUint8: case DataType::Type::kInt8: *restrictions |= kNoDiv | @@ -1712,6 +2056,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kUint16: case DataType::Type::kInt16: *restrictions |= kNoDiv | + kNoStringCharAt | // TODO: support in predicated mode. kNoSignedHAdd | kNoUnsignedHAdd | kNoUnroundedHAdd | @@ -1722,13 +2067,13 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict *restrictions |= kNoDiv | kNoSAD; return TrySetVectorLength(type, vector_length); case DataType::Type::kInt64: - *restrictions |= kNoDiv | kNoSAD; + *restrictions |= kNoDiv | kNoSAD | kNoIfCond; return TrySetVectorLength(type, vector_length); case DataType::Type::kFloat32: - *restrictions |= kNoReduction; + *restrictions |= kNoReduction | kNoIfCond; return TrySetVectorLength(type, vector_length); case DataType::Type::kFloat64: - *restrictions |= kNoReduction; + *restrictions |= kNoReduction | kNoIfCond; return TrySetVectorLength(type, vector_length); default: break; @@ -1737,6 +2082,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict } else { // Allow vectorization for all ARM devices, because Android assumes that // ARMv8 AArch64 always supports advanced SIMD (128-bit SIMD). + *restrictions |= kNoIfCond; switch (type) { case DataType::Type::kBool: case DataType::Type::kUint8: @@ -1767,6 +2113,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case InstructionSet::kX86: case InstructionSet::kX86_64: // Allow vectorization for SSE4.1-enabled X86 devices only (128-bit SIMD). + *restrictions |= kNoIfCond; if (features->AsX86InstructionSetFeatures()->HasSSE4_1()) { switch (type) { case DataType::Type::kBool: @@ -1855,15 +2202,7 @@ void HLoopOptimization::GenerateVecInv(HInstruction* org, DataType::Type type) { vector = new (global_allocator_) HVecReplicateScalar(global_allocator_, input, type, vector_length_, kNoDexPc); vector_permanent_map_->Put(org, Insert(vector_preheader_, vector)); - if (IsInPredicatedVectorizationMode()) { - HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_, - graph_->GetIntConstant(1), - type, - vector_length_, - 0u); - vector_preheader_->InsertInstructionBefore(set_pred, vector); - vector->AsVecOperation()->SetMergingGoverningPredicate(set_pred); - } + vector_external_set_->insert(vector); } vector_map_->Put(org, vector); } @@ -1936,18 +2275,18 @@ void HLoopOptimization::GenerateVecMem(HInstruction* org, vector_map_->Put(org, vector); } -void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) { - DCHECK(reductions_->find(phi) != reductions_->end()); - DCHECK(reductions_->Get(phi->InputAt(1)) == phi); +void HLoopOptimization::GenerateVecReductionPhi(HPhi* orig_phi) { + DCHECK(reductions_->find(orig_phi) != reductions_->end()); + DCHECK(reductions_->Get(orig_phi->InputAt(1)) == orig_phi); HInstruction* vector = nullptr; if (vector_mode_ == kSequential) { HPhi* new_phi = new (global_allocator_) HPhi( - global_allocator_, kNoRegNumber, 0, phi->GetType()); + global_allocator_, kNoRegNumber, 0, orig_phi->GetType()); vector_header_->AddPhi(new_phi); vector = new_phi; } else { // Link vector reduction back to prior unrolled update, or a first phi. - auto it = vector_permanent_map_->find(phi); + auto it = vector_permanent_map_->find(orig_phi); if (it != vector_permanent_map_->end()) { vector = it->second; } else { @@ -1957,7 +2296,7 @@ void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) { vector = new_phi; } } - vector_map_->Put(phi, vector); + vector_map_->Put(orig_phi, vector); } void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction) { @@ -1992,15 +2331,7 @@ void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* r vector_length, kNoDexPc)); } - if (IsInPredicatedVectorizationMode()) { - HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_, - graph_->GetIntConstant(1), - type, - vector_length, - 0u); - vector_preheader_->InsertInstructionBefore(set_pred, new_init); - new_init->AsVecOperation()->SetMergingGoverningPredicate(set_pred); - } + vector_external_set_->insert(new_init); } else { new_init = ReduceAndExtractIfNeeded(new_init); } @@ -2026,23 +2357,15 @@ HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruct // x = REDUCE( [x_1, .., x_n] ) // y = x_1 // along the exit of the defining loop. - HInstruction* reduce = new (global_allocator_) HVecReduce( + HVecReduce* reduce = new (global_allocator_) HVecReduce( global_allocator_, instruction, type, vector_length, kind, kNoDexPc); exit->InsertInstructionBefore(reduce, exit->GetFirstInstruction()); + vector_external_set_->insert(reduce); instruction = new (global_allocator_) HVecExtractScalar( global_allocator_, reduce, type, vector_length, 0, kNoDexPc); exit->InsertInstructionAfter(instruction, reduce); - if (IsInPredicatedVectorizationMode()) { - HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_, - graph_->GetIntConstant(1), - type, - vector_length, - 0u); - exit->InsertInstructionBefore(set_pred, reduce); - reduce->AsVecOperation()->SetMergingGoverningPredicate(set_pred); - instruction->AsVecOperation()->SetMergingGoverningPredicate(set_pred); - } + vector_external_set_->insert(instruction); } } return instruction; @@ -2057,10 +2380,10 @@ HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruct } \ break; -void HLoopOptimization::GenerateVecOp(HInstruction* org, - HInstruction* opa, - HInstruction* opb, - DataType::Type type) { +HInstruction* HLoopOptimization::GenerateVecOp(HInstruction* org, + HInstruction* opa, + HInstruction* opb, + DataType::Type type) { uint32_t dex_pc = org->GetDexPc(); HInstruction* vector = nullptr; DataType::Type org_type = org->GetType(); @@ -2130,11 +2453,23 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, GENERATE_VEC( new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc), new (global_allocator_) HAbs(org_type, opa, dex_pc)); + case HInstruction::kEqual: { + // Special case. + if (vector_mode_ == kVector) { + vector = new (global_allocator_) HVecCondition( + global_allocator_, opa, opb, type, vector_length_, dex_pc); + } else { + DCHECK(vector_mode_ == kSequential); + UNREACHABLE(); + } + } + break; default: break; } // switch CHECK(vector != nullptr) << "Unsupported SIMD operator"; vector_map_->Put(org, vector); + return vector; } #undef GENERATE_VEC @@ -2374,6 +2709,89 @@ bool HLoopOptimization::VectorizeDotProdIdiom(LoopNode* node, return false; } +bool HLoopOptimization::VectorizeIfCondition(LoopNode* node, + HInstruction* hif, + bool generate_code, + uint64_t restrictions) { + DCHECK(hif->IsIf()); + HInstruction* if_input = hif->InputAt(0); + + if (!if_input->HasOnlyOneNonEnvironmentUse()) { + // Avoid the complications of the condition used as materialized boolean. + return false; + } + + if (!if_input->IsEqual()) { + // TODO: Support other condition types. + return false; + } + + HCondition* cond = if_input->AsCondition(); + HInstruction* opa = cond->InputAt(0); + HInstruction* opb = cond->InputAt(1); + DataType::Type type = GetNarrowerType(opa, opb); + + if (!DataType::IsIntegralType(type)) { + return false; + } + + bool is_unsigned = false; + HInstruction* opa_promoted = opa; + HInstruction* opb_promoted = opb; + bool is_int_case = DataType::Type::kInt32 == opa->GetType() && + DataType::Type::kInt32 == opb->GetType(); + + // Condition arguments should be either both int32 or consistently extended signed/unsigned + // narrower operands. + if (!is_int_case && + !IsNarrowerOperands(opa, opb, type, &opa_promoted, &opb_promoted, &is_unsigned)) { + return false; + } + type = HVecOperation::ToProperType(type, is_unsigned); + + // For narrow types, explicit type conversion may have been + // optimized way, so set the no hi bits restriction here. + if (DataType::Size(type) <= 2) { + restrictions |= kNoHiBits; + } + + if (!TrySetVectorType(type, &restrictions) || + HasVectorRestrictions(restrictions, kNoIfCond)) { + return false; + } + + if (generate_code && vector_mode_ != kVector) { // de-idiom + opa_promoted = opa; + opb_promoted = opb; + } + + if (VectorizeUse(node, opa_promoted, generate_code, type, restrictions) && + VectorizeUse(node, opb_promoted, generate_code, type, restrictions)) { + if (generate_code) { + HInstruction* vec_cond = GenerateVecOp(cond, + vector_map_->Get(opa_promoted), + vector_map_->Get(opb_promoted), + type); + + if (vector_mode_ == kVector) { + HInstruction* vec_pred_not = new (global_allocator_) HVecPredNot( + global_allocator_, vec_cond, type, vector_length_, hif->GetDexPc()); + + vector_map_->Put(hif, vec_pred_not); + BlockPredicateInfo* pred_info = predicate_info_map_->Get(hif->GetBlock()); + pred_info->SetControlFlowInfo(vec_cond->AsVecPredSetOperation(), + vec_pred_not->AsVecPredSetOperation()); + } else { + DCHECK(vector_mode_ == kSequential); + UNREACHABLE(); + } + } + return true; + } + + return false; +} + // // Vectorization heuristics. // @@ -2423,6 +2841,8 @@ bool HLoopOptimization::IsVectorizationProfitable(int64_t trip_count) { // TODO: trip count is really unsigned entity, provided the guarding test // is satisfied; deal with this more carefully later uint32_t max_peel = MaxNumberPeeled(); + // Peeling is not supported in predicated mode. + DCHECK_IMPLIES(IsInPredicatedVectorizationMode(), max_peel == 0u); if (vector_length_ == 0) { return false; // nothing found } else if (trip_count < 0) { @@ -2686,4 +3106,67 @@ bool HLoopOptimization::CanRemoveCycle() { return true; } +void HLoopOptimization::PreparePredicateInfoMap(LoopNode* node) { + HLoopInformation* loop_info = node->loop_info; + + DCHECK(IsPredicatedLoopControlFlowSupported(loop_info)); + + for (HBlocksInLoopIterator block_it(*loop_info); + !block_it.Done(); + block_it.Advance()) { + HBasicBlock* cur_block = block_it.Current(); + BlockPredicateInfo* pred_info = new (loop_allocator_) BlockPredicateInfo(); + + predicate_info_map_->Put(cur_block, pred_info); + } +} + +void HLoopOptimization::InitPredicateInfoMap(LoopNode* node, + HVecPredSetOperation* loop_main_pred) { + HLoopInformation* loop_info = node->loop_info; + HBasicBlock* header = loop_info->GetHeader(); + BlockPredicateInfo* header_info = predicate_info_map_->Get(header); + // Loop header is a special case; it doesn't have a false predicate because we + // would just exit the loop then. + header_info->SetControlFlowInfo(loop_main_pred, loop_main_pred); + + size_t blocks_in_loop = header->GetLoopInformation()->GetBlocks().NumSetBits(); + if (blocks_in_loop == 2) { + for (HBasicBlock* successor : header->GetSuccessors()) { + if (loop_info->Contains(*successor)) { + // This is loop second block - body. + BlockPredicateInfo* body_info = predicate_info_map_->Get(successor); + body_info->SetControlPredicate(loop_main_pred); + return; + } + } + UNREACHABLE(); + } + + // TODO: support predicated vectorization of CF loop of more complex structure. + DCHECK(HasLoopDiamondStructure(loop_info)); + HBasicBlock* header_succ_0 = header->GetSuccessors()[0]; + HBasicBlock* header_succ_1 = header->GetSuccessors()[1]; + HBasicBlock* diamond_top = loop_info->Contains(*header_succ_0) ? + header_succ_0 : + header_succ_1; + + HIf* diamond_hif = diamond_top->GetLastInstruction()->AsIf(); + HBasicBlock* diamond_true = diamond_hif->IfTrueSuccessor(); + HBasicBlock* diamond_false = diamond_hif->IfFalseSuccessor(); + HBasicBlock* back_edge = diamond_true->GetSingleSuccessor(); + + BlockPredicateInfo* diamond_top_info = predicate_info_map_->Get(diamond_top); + BlockPredicateInfo* diamond_true_info = predicate_info_map_->Get(diamond_true); + BlockPredicateInfo* diamond_false_info = predicate_info_map_->Get(diamond_false); + BlockPredicateInfo* back_edge_info = predicate_info_map_->Get(back_edge); + + diamond_top_info->SetControlPredicate(header_info->GetTruePredicate()); + + diamond_true_info->SetControlPredicate(diamond_top_info->GetTruePredicate()); + diamond_false_info->SetControlPredicate(diamond_top_info->GetFalsePredicate()); + + back_edge_info->SetControlPredicate(header_info->GetTruePredicate()); +} + } // namespace art diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index 6dd778ba74..86a9f0fcb8 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -101,6 +101,7 @@ class HLoopOptimization : public HOptimization { kNoSAD = 1 << 11, // no sum of absolute differences (SAD) kNoWideSAD = 1 << 12, // no sum of absolute differences (SAD) with operand widening kNoDotProd = 1 << 13, // no dot product + kNoIfCond = 1 << 14, // no if condition conversion }; /* @@ -136,6 +137,95 @@ class HLoopOptimization : public HOptimization { bool is_string_char_at; // compressed string read }; + // This structure describes the control flow (CF) -> data flow (DF) conversion of the loop + // with control flow (see below) for the purpose of predicated autovectorization. + // + // Lets define "loops without control-flow" (or non-CF loops) as loops with two consecutive + // blocks and without the branching structure except for the loop exit. And + // "loop with control-flow" (or CF-loops) - all other loops. + // + // In the execution of the original CF-loop on each iteration some basic block Y will be + // either executed or not executed, depending on the control flow of the loop. More + // specifically, a block will be executed if all the conditional branches of the nodes in + // the control dependency graph for that block Y are taken according to the path from the loop + // header to that basic block. + // + // This is the key idea of CF->DF conversion: a boolean value + // 'ctrl_pred == cond1 && cond2 && ...' will determine whether the basic block Y will be + // executed, where cond_K is whether the branch of the node K in the control dependency + // graph upward traversal was taken in the 'right' direction. + // + // Def.: BB Y is control dependent on BB X iff + // (1) there exists a directed path P from X to Y with any basic block Z in P (excluding X + // and Y) post-dominated by Y and + // (2) X is not post-dominated by Y. + // ... + // X + // false / \ true + // / \ + // ... + // | + // Y + // ... + // + // When doing predicated autovectorization of a CF loop, we use the CF->DF conversion approach: + // 1) do the data analysis and vector operation creation as if it was a non-CF loop. + // 2) for each HIf block create two vector predicate setting instructions - for True and False + // edges/paths. + // 3) assign a governing vector predicate (see comments near HVecPredSetOperation) + // to each vector operation Alpha in the loop (including to those vector predicate setting + // instructions created in #2); do this by: + // - finding the immediate control dependent block of the instruction Alpha's block. + // - choosing the True or False predicate setting instruction (created in #2) depending + // on the path to the instruction. + // + // For more information check the papers: + // + // - Allen, John R and Kennedy, Ken and Porterfield, Carrie and Warren, Joe, + // “Conversion of Control Dependence to Data Dependence,” in Proceedings of the 10th ACM + // SIGACT-SIGPLAN Symposium on Principles of Programming Languages, 1983, pp. 177–189. + // - JEANNE FERRANTE, KARL J. OTTENSTEIN, JOE D. WARREN, + // "The Program Dependence Graph and Its Use in Optimization" + // + class BlockPredicateInfo : public ArenaObject<kArenaAllocLoopOptimization> { + public: + BlockPredicateInfo() : + control_predicate_(nullptr), + true_predicate_(nullptr), + false_predicate_(nullptr) {} + + void SetControlFlowInfo(HVecPredSetOperation* true_predicate, + HVecPredSetOperation* false_predicate) { + DCHECK(!HasControlFlowOps()); + true_predicate_ = true_predicate; + false_predicate_ = false_predicate; + } + + bool HasControlFlowOps() const { + // Note: a block must have both T/F predicates set or none of them. + DCHECK_EQ(true_predicate_ == nullptr, false_predicate_ == nullptr); + return true_predicate_ != nullptr; + } + + HVecPredSetOperation* GetControlPredicate() const { return control_predicate_; } + void SetControlPredicate(HVecPredSetOperation* control_predicate) { + control_predicate_ = control_predicate; + } + + HVecPredSetOperation* GetTruePredicate() const { return true_predicate_; } + HVecPredSetOperation* GetFalsePredicate() const { return false_predicate_; } + + private: + // Vector control predicate operation, associated with the block which will determine + // the active lanes for all vector operations, originated from this block. + HVecPredSetOperation* control_predicate_; + + // Vector predicate instruction, associated with the true sucessor of the block. + HVecPredSetOperation* true_predicate_; + // Vector predicate instruction, associated with the false sucessor of the block. + HVecPredSetOperation* false_predicate_; + }; + // // Loop setup and traversal. // @@ -203,15 +293,95 @@ class HLoopOptimization : public HOptimization { // Vectorization analysis and synthesis. // - bool ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count); - void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count); - void GenerateNewLoop(LoopNode* node, - HBasicBlock* block, - HBasicBlock* new_preheader, - HInstruction* lo, - HInstruction* hi, - HInstruction* step, - uint32_t unroll); + // Returns whether the data flow requirements are met for vectorization. + // + // - checks whether instructions are vectorizable for the target. + // - conducts data dependence analysis for array references. + // - additionally, collects info on peeling and aligment strategy. + bool CanVectorizeDataFlow(LoopNode* node, HBasicBlock* header, bool collect_alignment_info); + + // Does the checks (common for predicated and traditional mode) for the loop. + bool ShouldVectorizeCommon(LoopNode* node, HPhi* main_phi, int64_t trip_count); + + // Try to vectorize the loop, returns whether it was successful. + // + // There are two versions/algorithms: + // - Predicated: all the vector operations have governing predicates which control + // which individual vector lanes will be active (see HVecPredSetOperation for more details). + // Example: vectorization using AArch64 SVE. + // - Traditional: a regular mode in which all vector operations lanes are unconditionally + // active. + // Example: vectoriation using AArch64 NEON. + bool TryVectorizePredicated(LoopNode* node, + HBasicBlock* body, + HBasicBlock* exit, + HPhi* main_phi, + int64_t trip_count); + + bool TryVectorizedTraditional(LoopNode* node, + HBasicBlock* body, + HBasicBlock* exit, + HPhi* main_phi, + int64_t trip_count); + + // Vectorizes the loop for which all checks have been already done. + void VectorizePredicated(LoopNode* node, + HBasicBlock* block, + HBasicBlock* exit); + void VectorizeTraditional(LoopNode* node, + HBasicBlock* block, + HBasicBlock* exit, + int64_t trip_count); + + // Performs final steps for whole vectorization process: links reduction, removes the original + // scalar loop, updates loop info. + void FinalizeVectorization(LoopNode* node); + + // Helpers that do the vector instruction synthesis for the previously created loop; create + // and fill the loop body with instructions. + // + // A version to generate a vector loop in predicated mode. + void GenerateNewLoopPredicated(LoopNode* node, + HBasicBlock* new_preheader, + HInstruction* lo, + HInstruction* hi, + HInstruction* step); + + // A version to generate a vector loop in traditional mode or to generate + // a scalar loop for both modes. + void GenerateNewLoopScalarOrTraditional(LoopNode* node, + HBasicBlock* new_preheader, + HInstruction* lo, + HInstruction* hi, + HInstruction* step, + uint32_t unroll); + + // + // Helpers for GenerateNewLoop*. + // + + // Updates vectorization bookkeeping date for the new loop, creates and returns + // its main induction Phi. + HPhi* InitializeForNewLoop(HBasicBlock* new_preheader, HInstruction* lo); + + // Finalizes reduction and induction phis' inputs for the newly created loop. + void FinalizePhisForNewLoop(HPhi* phi, HInstruction* lo); + + // Creates empty predicate info object for each basic block and puts it into the map. + void PreparePredicateInfoMap(LoopNode* node); + + // Set up block true/false predicates using info, collected through data flow and control + // dependency analysis. + void InitPredicateInfoMap(LoopNode* node, HVecPredSetOperation* loop_main_pred); + + // Performs instruction synthesis for the loop body. + void GenerateNewLoopBodyOnce(LoopNode* node, + DataType::Type induc_type, + HInstruction* step); + + // Returns whether the vector loop needs runtime disambiguation test for array refs. + bool NeedsArrayRefsDisambiguationTest() const { return vector_runtime_test_a_ != nullptr; } + bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code); bool VectorizeUse(LoopNode* node, HInstruction* instruction, @@ -239,10 +409,10 @@ class HLoopOptimization : public HOptimization { void GenerateVecReductionPhi(HPhi* phi); void GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction); HInstruction* ReduceAndExtractIfNeeded(HInstruction* instruction); - void GenerateVecOp(HInstruction* org, - HInstruction* opa, - HInstruction* opb, - DataType::Type type); + HInstruction* GenerateVecOp(HInstruction* org, + HInstruction* opa, + HInstruction* opb, + DataType::Type type); // Vectorization idioms. bool VectorizeSaturationIdiom(LoopNode* node, @@ -265,6 +435,10 @@ class HLoopOptimization : public HOptimization { bool generate_code, DataType::Type type, uint64_t restrictions); + bool VectorizeIfCondition(LoopNode* node, + HInstruction* instruction, + bool generate_code, + uint64_t restrictions); // Vectorization heuristics. Alignment ComputeAlignment(HInstruction* offset, @@ -369,6 +543,16 @@ class HLoopOptimization : public HOptimization { // Contents reside in phase-local heap memory. ScopedArenaSafeMap<HInstruction*, HInstruction*>* vector_permanent_map_; + // Tracks vector operations that are inserted outside of the loop (preheader, exit) + // as part of vectorization (e.g. replicate scalar for loop invariants and reduce ops + // for loop reductions). + ScopedArenaSet<HInstruction*>* vector_external_set_; + + // A mapping between a basic block of the original loop and its associated PredicateInfo. + // + // Only used in predicated loop vectorization mode. + ScopedArenaSafeMap<HBasicBlock*, BlockPredicateInfo*>* predicate_info_map_; + // Temporary vectorization bookkeeping. VectorMode vector_mode_; // synthesis mode HBasicBlock* vector_preheader_; // preheader of the new loop diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc index 7f694fb655..49e3c0418f 100644 --- a/compiler/optimizing/loop_optimization_test.cc +++ b/compiler/optimizing/loop_optimization_test.cc @@ -30,6 +30,7 @@ namespace art HIDDEN { class LoopOptimizationTest : public OptimizingUnitTest { protected: void SetUp() override { + TEST_SETUP_DISABLED_FOR_RISCV64(); OptimizingUnitTest::SetUp(); graph_ = CreateGraph(); @@ -44,6 +45,7 @@ class LoopOptimizationTest : public OptimizingUnitTest { } void TearDown() override { + TEST_TEARDOWN_DISABLED_FOR_RISCV64(); codegen_.reset(); compiler_options_.reset(); graph_ = nullptr; @@ -134,17 +136,20 @@ class LoopOptimizationTest : public OptimizingUnitTest { // TEST_F(LoopOptimizationTest, NoLoops) { + TEST_DISABLED_FOR_RISCV64(); PerformAnalysis(); EXPECT_EQ("", LoopStructure()); } TEST_F(LoopOptimizationTest, SingleLoop) { + TEST_DISABLED_FOR_RISCV64(); AddLoop(entry_block_, return_block_); PerformAnalysis(); EXPECT_EQ("[]", LoopStructure()); } TEST_F(LoopOptimizationTest, LoopNest10) { + TEST_DISABLED_FOR_RISCV64(); HBasicBlock* b = entry_block_; HBasicBlock* s = return_block_; for (int i = 0; i < 10; i++) { @@ -156,6 +161,7 @@ TEST_F(LoopOptimizationTest, LoopNest10) { } TEST_F(LoopOptimizationTest, LoopSequence10) { + TEST_DISABLED_FOR_RISCV64(); HBasicBlock* b = entry_block_; HBasicBlock* s = return_block_; for (int i = 0; i < 10; i++) { @@ -167,6 +173,7 @@ TEST_F(LoopOptimizationTest, LoopSequence10) { } TEST_F(LoopOptimizationTest, LoopSequenceOfNests) { + TEST_DISABLED_FOR_RISCV64(); HBasicBlock* b = entry_block_; HBasicBlock* s = return_block_; for (int i = 0; i < 10; i++) { @@ -194,6 +201,7 @@ TEST_F(LoopOptimizationTest, LoopSequenceOfNests) { } TEST_F(LoopOptimizationTest, LoopNestWithSequence) { + TEST_DISABLED_FOR_RISCV64(); HBasicBlock* b = entry_block_; HBasicBlock* s = return_block_; for (int i = 0; i < 10; i++) { @@ -215,6 +223,7 @@ TEST_F(LoopOptimizationTest, LoopNestWithSequence) { // // This is a test for nodes.cc functionality - HGraph::SimplifyLoop. TEST_F(LoopOptimizationTest, SimplifyLoopReoderPredecessors) { + TEST_DISABLED_FOR_RISCV64(); // Can't use AddLoop as we want special order for blocks predecessors. HBasicBlock* header = new (GetAllocator()) HBasicBlock(graph_); HBasicBlock* body = new (GetAllocator()) HBasicBlock(graph_); @@ -260,6 +269,7 @@ TEST_F(LoopOptimizationTest, SimplifyLoopReoderPredecessors) { // // This is a test for nodes.cc functionality - HGraph::SimplifyLoop. TEST_F(LoopOptimizationTest, SimplifyLoopSinglePreheader) { + TEST_DISABLED_FOR_RISCV64(); HBasicBlock* header = AddLoop(entry_block_, return_block_); header->InsertInstructionBefore( diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 3790058879..5795ea7ca9 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -36,6 +36,7 @@ #include "code_generator.h" #include "common_dominator.h" #include "intrinsics.h" +#include "intrinsics_list.h" #include "mirror/class-inl.h" #include "scoped_thread_state_change-inl.h" #include "ssa_builder.h" @@ -1488,12 +1489,12 @@ bool HInstructionList::FoundBefore(const HInstruction* instruction1, const HInstruction* instruction2) const { DCHECK_EQ(instruction1->GetBlock(), instruction2->GetBlock()); for (HInstructionIterator it(*this); !it.Done(); it.Advance()) { - if (it.Current() == instruction1) { - return true; - } if (it.Current() == instruction2) { return false; } + if (it.Current() == instruction1) { + return true; + } } LOG(FATAL) << "Did not find an order between two instructions of the same block."; UNREACHABLE(); @@ -1815,10 +1816,12 @@ void HGraphVisitor::VisitBasicBlock(HBasicBlock* block) { } } -HConstant* HTypeConversion::TryStaticEvaluation() const { - HGraph* graph = GetBlock()->GetGraph(); - if (GetInput()->IsIntConstant()) { - int32_t value = GetInput()->AsIntConstant()->GetValue(); +HConstant* HTypeConversion::TryStaticEvaluation() const { return TryStaticEvaluation(GetInput()); } + +HConstant* HTypeConversion::TryStaticEvaluation(HInstruction* input) const { + HGraph* graph = input->GetBlock()->GetGraph(); + if (input->IsIntConstant()) { + int32_t value = input->AsIntConstant()->GetValue(); switch (GetResultType()) { case DataType::Type::kInt8: return graph->GetIntConstant(static_cast<int8_t>(value), GetDexPc()); @@ -1837,8 +1840,8 @@ HConstant* HTypeConversion::TryStaticEvaluation() const { default: return nullptr; } - } else if (GetInput()->IsLongConstant()) { - int64_t value = GetInput()->AsLongConstant()->GetValue(); + } else if (input->IsLongConstant()) { + int64_t value = input->AsLongConstant()->GetValue(); switch (GetResultType()) { case DataType::Type::kInt8: return graph->GetIntConstant(static_cast<int8_t>(value), GetDexPc()); @@ -1857,8 +1860,8 @@ HConstant* HTypeConversion::TryStaticEvaluation() const { default: return nullptr; } - } else if (GetInput()->IsFloatConstant()) { - float value = GetInput()->AsFloatConstant()->GetValue(); + } else if (input->IsFloatConstant()) { + float value = input->AsFloatConstant()->GetValue(); switch (GetResultType()) { case DataType::Type::kInt32: if (std::isnan(value)) @@ -1881,8 +1884,8 @@ HConstant* HTypeConversion::TryStaticEvaluation() const { default: return nullptr; } - } else if (GetInput()->IsDoubleConstant()) { - double value = GetInput()->AsDoubleConstant()->GetValue(); + } else if (input->IsDoubleConstant()) { + double value = input->AsDoubleConstant()->GetValue(); switch (GetResultType()) { case DataType::Type::kInt32: if (std::isnan(value)) @@ -1909,41 +1912,47 @@ HConstant* HTypeConversion::TryStaticEvaluation() const { return nullptr; } -HConstant* HUnaryOperation::TryStaticEvaluation() const { - if (GetInput()->IsIntConstant()) { - return Evaluate(GetInput()->AsIntConstant()); - } else if (GetInput()->IsLongConstant()) { - return Evaluate(GetInput()->AsLongConstant()); +HConstant* HUnaryOperation::TryStaticEvaluation() const { return TryStaticEvaluation(GetInput()); } + +HConstant* HUnaryOperation::TryStaticEvaluation(HInstruction* input) const { + if (input->IsIntConstant()) { + return Evaluate(input->AsIntConstant()); + } else if (input->IsLongConstant()) { + return Evaluate(input->AsLongConstant()); } else if (kEnableFloatingPointStaticEvaluation) { - if (GetInput()->IsFloatConstant()) { - return Evaluate(GetInput()->AsFloatConstant()); - } else if (GetInput()->IsDoubleConstant()) { - return Evaluate(GetInput()->AsDoubleConstant()); + if (input->IsFloatConstant()) { + return Evaluate(input->AsFloatConstant()); + } else if (input->IsDoubleConstant()) { + return Evaluate(input->AsDoubleConstant()); } } return nullptr; } HConstant* HBinaryOperation::TryStaticEvaluation() const { - if (GetLeft()->IsIntConstant() && GetRight()->IsIntConstant()) { - return Evaluate(GetLeft()->AsIntConstant(), GetRight()->AsIntConstant()); - } else if (GetLeft()->IsLongConstant()) { - if (GetRight()->IsIntConstant()) { + return TryStaticEvaluation(GetLeft(), GetRight()); +} + +HConstant* HBinaryOperation::TryStaticEvaluation(HInstruction* left, HInstruction* right) const { + if (left->IsIntConstant() && right->IsIntConstant()) { + return Evaluate(left->AsIntConstant(), right->AsIntConstant()); + } else if (left->IsLongConstant()) { + if (right->IsIntConstant()) { // The binop(long, int) case is only valid for shifts and rotations. DCHECK(IsShl() || IsShr() || IsUShr() || IsRor()) << DebugName(); - return Evaluate(GetLeft()->AsLongConstant(), GetRight()->AsIntConstant()); - } else if (GetRight()->IsLongConstant()) { - return Evaluate(GetLeft()->AsLongConstant(), GetRight()->AsLongConstant()); + return Evaluate(left->AsLongConstant(), right->AsIntConstant()); + } else if (right->IsLongConstant()) { + return Evaluate(left->AsLongConstant(), right->AsLongConstant()); } - } else if (GetLeft()->IsNullConstant() && GetRight()->IsNullConstant()) { + } else if (left->IsNullConstant() && right->IsNullConstant()) { // The binop(null, null) case is only valid for equal and not-equal conditions. DCHECK(IsEqual() || IsNotEqual()) << DebugName(); - return Evaluate(GetLeft()->AsNullConstant(), GetRight()->AsNullConstant()); + return Evaluate(left->AsNullConstant(), right->AsNullConstant()); } else if (kEnableFloatingPointStaticEvaluation) { - if (GetLeft()->IsFloatConstant() && GetRight()->IsFloatConstant()) { - return Evaluate(GetLeft()->AsFloatConstant(), GetRight()->AsFloatConstant()); - } else if (GetLeft()->IsDoubleConstant() && GetRight()->IsDoubleConstant()) { - return Evaluate(GetLeft()->AsDoubleConstant(), GetRight()->AsDoubleConstant()); + if (left->IsFloatConstant() && right->IsFloatConstant()) { + return Evaluate(left->AsFloatConstant(), right->AsFloatConstant()); + } else if (left->IsDoubleConstant() && right->IsDoubleConstant()) { + return Evaluate(left->AsDoubleConstant(), right->AsDoubleConstant()); } } return nullptr; @@ -2797,8 +2806,11 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { if (HasMonitorOperations()) { outer_graph->SetHasMonitorOperations(true); } - if (HasSIMD()) { - outer_graph->SetHasSIMD(true); + if (HasTraditionalSIMD()) { + outer_graph->SetHasTraditionalSIMD(true); + } + if (HasPredicatedSIMD()) { + outer_graph->SetHasPredicatedSIMD(true); } if (HasAlwaysThrowingInvokes()) { outer_graph->SetHasAlwaysThrowingInvokes(true); @@ -3026,9 +3038,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { replacement = outer_graph->GetDoubleConstant( current->AsDoubleConstant()->GetValue(), current->GetDexPc()); } else if (current->IsParameterValue()) { - if (kIsDebugBuild - && invoke->IsInvokeStaticOrDirect() - && invoke->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck()) { + if (kIsDebugBuild && + invoke->IsInvokeStaticOrDirect() && + invoke->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck()) { // Ensure we do not use the last input of `invoke`, as it // contains a clinit check which is not an actual argument. size_t last_input_index = invoke->InputCount() - 1; @@ -3125,6 +3137,8 @@ void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) { new_pre_header, old_pre_header, /* replace_if_back_edge= */ false); } +// Creates a new two-basic-block loop and inserts it between original loop header and +// original loop exit; also adjusts dominators, post order and new LoopInformation. HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header, HBasicBlock* body, HBasicBlock* exit) { @@ -3518,9 +3532,7 @@ std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs) { static_assert( \ static_cast<uint32_t>(Intrinsics::k ## Name) <= (kAccIntrinsicBits >> CTZ(kAccIntrinsicBits)), \ "Instrinsics enumeration space overflow."); -#include "intrinsics_list.h" - INTRINSICS_LIST(CHECK_INTRINSICS_ENUM_VALUES) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(CHECK_INTRINSICS_ENUM_VALUES) #undef CHECK_INTRINSICS_ENUM_VALUES // Function that returns whether an intrinsic needs an environment or not. @@ -3531,9 +3543,7 @@ static inline IntrinsicNeedsEnvironment NeedsEnvironmentIntrinsic(Intrinsics i) #define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnv, SideEffects, Exceptions, ...) \ case Intrinsics::k ## Name: \ return NeedsEnv; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS } return kNeedsEnvironment; @@ -3547,9 +3557,7 @@ static inline IntrinsicSideEffects GetSideEffectsIntrinsic(Intrinsics i) { #define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnv, SideEffects, Exceptions, ...) \ case Intrinsics::k ## Name: \ return SideEffects; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS } return kAllSideEffects; @@ -3563,9 +3571,7 @@ static inline IntrinsicExceptions GetExceptionsIntrinsic(Intrinsics i) { #define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnv, SideEffects, Exceptions, ...) \ case Intrinsics::k ## Name: \ return Exceptions; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS } return kCanThrow; diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 28112d176a..9cf52183b8 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -403,7 +403,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { has_bounds_checks_(false), has_try_catch_(false), has_monitor_operations_(false), - has_simd_(false), + has_traditional_simd_(false), + has_predicated_simd_(false), has_loops_(false), has_irreducible_loops_(false), has_direct_critical_native_call_(false), @@ -708,8 +709,13 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool HasMonitorOperations() const { return has_monitor_operations_; } void SetHasMonitorOperations(bool value) { has_monitor_operations_ = value; } - bool HasSIMD() const { return has_simd_; } - void SetHasSIMD(bool value) { has_simd_ = value; } + bool HasTraditionalSIMD() { return has_traditional_simd_; } + void SetHasTraditionalSIMD(bool value) { has_traditional_simd_ = value; } + + bool HasPredicatedSIMD() { return has_predicated_simd_; } + void SetHasPredicatedSIMD(bool value) { has_predicated_simd_ = value; } + + bool HasSIMD() const { return has_traditional_simd_ || has_predicated_simd_; } bool HasLoops() const { return has_loops_; } void SetHasLoops(bool value) { has_loops_ = value; } @@ -822,10 +828,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // DexRegisterMap to be present to allow deadlock analysis for non-debuggable code. bool has_monitor_operations_; - // Flag whether SIMD instructions appear in the graph. If true, the - // code generators may have to be more careful spilling the wider + // Flags whether SIMD (traditional or predicated) instructions appear in the graph. + // If either is true, the code generators may have to be more careful spilling the wider // contents of SIMD registers. - bool has_simd_; + bool has_traditional_simd_; + bool has_predicated_simd_; // Flag whether there are any loops in the graph. We can skip loop // optimization if it's false. @@ -1636,7 +1643,9 @@ class HLoopInformationOutwardIterator : public ValueObject { M(VecStore, VecMemoryOperation) \ M(VecPredSetAll, VecPredSetOperation) \ M(VecPredWhile, VecPredSetOperation) \ - M(VecPredCondition, VecOperation) \ + M(VecPredToBoolean, VecOperation) \ + M(VecCondition, VecPredSetOperation) \ + M(VecPredNot, VecPredSetOperation) \ #define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \ FOR_EACH_CONCRETE_INSTRUCTION_SCALAR_COMMON(M) \ @@ -1659,6 +1668,8 @@ class HLoopInformationOutwardIterator : public ValueObject { #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) +#define FOR_EACH_CONCRETE_INSTRUCTION_RISCV64(M) + #ifndef ART_ENABLE_CODEGEN_x86 #define FOR_EACH_CONCRETE_INSTRUCTION_X86(M) #else @@ -1715,7 +1726,7 @@ FOR_EACH_INSTRUCTION(FORWARD_DECLARATION) const char* DebugName() const override { return #type; } \ HInstruction* Clone(ArenaAllocator* arena) const override { \ DCHECK(IsClonable()); \ - return new (arena) H##type(*this->As##type()); \ + return new (arena) H##type(*this); \ } \ void Accept(HGraphVisitor* visitor) override @@ -2062,12 +2073,12 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { ArtMethod* method, uint32_t dex_pc, HInstruction* holder) - : vregs_(number_of_vregs, allocator->Adapter(kArenaAllocEnvironmentVRegs)), - locations_(allocator->Adapter(kArenaAllocEnvironmentLocations)), - parent_(nullptr), - method_(method), - dex_pc_(dex_pc), - holder_(holder) { + : vregs_(number_of_vregs, allocator->Adapter(kArenaAllocEnvironmentVRegs)), + locations_(allocator->Adapter(kArenaAllocEnvironmentLocations)), + parent_(nullptr), + method_(method), + dex_pc_(dex_pc), + holder_(holder) { } ALWAYS_INLINE HEnvironment(ArenaAllocator* allocator, @@ -2183,9 +2194,14 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { std::ostream& operator<<(std::ostream& os, const HInstruction& rhs); // Iterates over the Environments -class HEnvironmentIterator : public ValueObject, - public std::iterator<std::forward_iterator_tag, HEnvironment*> { +class HEnvironmentIterator : public ValueObject { public: + using iterator_category = std::forward_iterator_tag; + using value_type = HEnvironment*; + using difference_type = ptrdiff_t; + using pointer = void; + using reference = void; + explicit HEnvironmentIterator(HEnvironment* cur) : cur_(cur) {} HEnvironment* operator*() const { @@ -2355,9 +2371,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { return true; } - virtual bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const { - return false; - } + virtual bool CanDoImplicitNullCheckOn([[maybe_unused]] HInstruction* obj) const { return false; } // If this instruction will do an implicit null check, return the `HNullCheck` associated // with it. Otherwise return null. @@ -2553,7 +2567,9 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { #define INSTRUCTION_TYPE_CAST(type, super) \ const H##type* As##type() const; \ - H##type* As##type(); + H##type* As##type(); \ + const H##type* As##type##OrNull() const; \ + H##type* As##type##OrNull(); FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CAST) #undef INSTRUCTION_TYPE_CAST @@ -2568,7 +2584,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // // Note: HEnvironment and some other fields are not copied and are set to default values, see // 'explicit HInstruction(const HInstruction& other)' for details. - virtual HInstruction* Clone(ArenaAllocator* arena ATTRIBUTE_UNUSED) const { + virtual HInstruction* Clone([[maybe_unused]] ArenaAllocator* arena) const { LOG(FATAL) << "Cloning is not implemented for the instruction " << DebugName() << " " << GetId(); UNREACHABLE(); @@ -2596,7 +2612,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // Returns whether any data encoded in the two instructions is equal. // This method does not look at the inputs. Both instructions must be // of the same type, otherwise the method has undefined behavior. - virtual bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const { + virtual bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const { return false; } @@ -2729,7 +2745,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { private: using InstructionKindField = - BitField<InstructionKind, kFieldInstructionKind, kFieldInstructionKindSize>; + BitField<InstructionKind, kFieldInstructionKind, kFieldInstructionKindSize>; void FixUpUserRecordsAfterUseInsertion(HUseList<HInstruction*>::iterator fixup_end) { auto before_use_node = uses_.before_begin(); @@ -2904,9 +2920,14 @@ class HBackwardInstructionIterator : public ValueObject { }; template <typename InnerIter> -struct HSTLInstructionIterator : public ValueObject, - public std::iterator<std::forward_iterator_tag, HInstruction*> { +struct HSTLInstructionIterator : public ValueObject { public: + using iterator_category = std::forward_iterator_tag; + using value_type = HInstruction*; + using difference_type = ptrdiff_t; + using pointer = void; + using reference = void; + static_assert(std::is_same_v<InnerIter, HBackwardInstructionIterator> || std::is_same_v<InnerIter, HInstructionIterator> || std::is_same_v<InnerIter, HInstructionIteratorHandleChanges>, @@ -3164,7 +3185,7 @@ class HPhi final : public HVariableInputSizeInstruction { bool IsVRegEquivalentOf(const HInstruction* other) const { return other != nullptr && other->IsPhi() - && other->AsPhi()->GetBlock() == GetBlock() + && other->GetBlock() == GetBlock() && other->AsPhi()->GetRegNumber() == GetRegNumber(); } @@ -3270,7 +3291,7 @@ class HConstant : public HExpression<0> { class HNullConstant final : public HConstant { public: - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } @@ -3639,7 +3660,8 @@ class HDeoptimize final : public HVariableInputSizeInstruction { bool CanBeMoved() const override { return GetPackedFlag<kFieldCanBeMoved>(); } bool InstructionDataEquals(const HInstruction* other) const override { - return (other->CanBeMoved() == CanBeMoved()) && (other->AsDeoptimize()->GetKind() == GetKind()); + return (other->CanBeMoved() == CanBeMoved()) && + (other->AsDeoptimize()->GetDeoptimizationKind() == GetDeoptimizationKind()); } bool NeedsEnvironment() const override { return true; } @@ -3827,7 +3849,7 @@ class HUnaryOperation : public HExpression<1> { DataType::Type GetResultType() const { return GetType(); } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } @@ -3836,6 +3858,9 @@ class HUnaryOperation : public HExpression<1> { // be evaluated as a constant, return null. HConstant* TryStaticEvaluation() const; + // Same but for `input` instead of GetInput(). + HConstant* TryStaticEvaluation(HInstruction* input) const; + // Apply this operation to `x`. virtual HConstant* Evaluate(HIntConstant* x) const = 0; virtual HConstant* Evaluate(HLongConstant* x) const = 0; @@ -3903,7 +3928,7 @@ class HBinaryOperation : public HExpression<2> { } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } @@ -3912,16 +3937,19 @@ class HBinaryOperation : public HExpression<2> { // be evaluated as a constant, return null. HConstant* TryStaticEvaluation() const; + // Same but for `left` and `right` instead of GetLeft() and GetRight(). + HConstant* TryStaticEvaluation(HInstruction* left, HInstruction* right) const; + // Apply this operation to `x` and `y`. - virtual HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED, - HNullConstant* y ATTRIBUTE_UNUSED) const { + virtual HConstant* Evaluate([[maybe_unused]] HNullConstant* x, + [[maybe_unused]] HNullConstant* y) const { LOG(FATAL) << DebugName() << " is not defined for the (null, null) case."; UNREACHABLE(); } virtual HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const = 0; virtual HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const = 0; - virtual HConstant* Evaluate(HLongConstant* x ATTRIBUTE_UNUSED, - HIntConstant* y ATTRIBUTE_UNUSED) const { + virtual HConstant* Evaluate([[maybe_unused]] HLongConstant* x, + [[maybe_unused]] HIntConstant* y) const { LOG(FATAL) << DebugName() << " is not defined for the (long, int) case."; UNREACHABLE(); } @@ -4049,8 +4077,8 @@ class HEqual final : public HCondition { bool IsCommutative() const override { return true; } - HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED, - HNullConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HNullConstant* x, + [[maybe_unused]] HNullConstant* y) const override { return MakeConstantCondition(true, GetDexPc()); } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { @@ -4096,8 +4124,8 @@ class HNotEqual final : public HCondition { bool IsCommutative() const override { return true; } - HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED, - HNullConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HNullConstant* x, + [[maybe_unused]] HNullConstant* y) const override { return MakeConstantCondition(false, GetDexPc()); } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { @@ -4303,13 +4331,13 @@ class HBelow final : public HCondition { HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -4345,13 +4373,13 @@ class HBelowOrEqual final : public HCondition { HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -4387,13 +4415,13 @@ class HAbove final : public HCondition { HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -4429,13 +4457,13 @@ class HAboveOrEqual final : public HCondition { HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -4522,7 +4550,7 @@ class HCompare final : public HBinaryOperation { return GetBias() == ComparisonBias::kGtBias; } - static SideEffects SideEffectsForArchRuntimeCalls(DataType::Type type ATTRIBUTE_UNUSED) { + static SideEffects SideEffectsForArchRuntimeCalls([[maybe_unused]] DataType::Type type) { // Comparisons do not require a runtime call in any back end. return SideEffects::None(); } @@ -4859,8 +4887,7 @@ class HInvokePolymorphic final : public HInvoke { // to pass intrinsic information to the HInvokePolymorphic node. ArtMethod* resolved_method, MethodReference resolved_method_reference, - dex::ProtoIndex proto_idx, - bool enable_intrinsic_opt) + dex::ProtoIndex proto_idx) : HInvoke(kInvokePolymorphic, allocator, number_of_arguments, @@ -4871,9 +4898,8 @@ class HInvokePolymorphic final : public HInvoke { resolved_method, resolved_method_reference, kPolymorphic, - enable_intrinsic_opt), - proto_idx_(proto_idx) { - } + /* enable_intrinsic_opt= */ true), + proto_idx_(proto_idx) {} bool IsClonable() const override { return true; } @@ -5015,7 +5041,7 @@ class HInvokeStaticOrDirect final : public HInvoke { return input_records; } - bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override { + bool CanDoImplicitNullCheckOn([[maybe_unused]] HInstruction* obj) const override { // We do not access the method via object reference, so we cannot do an implicit null check. // TODO: for intrinsics we can generate implicit null checks. return false; @@ -5599,10 +5625,14 @@ class HMin final : public HBinaryOperation { ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); } // TODO: Evaluation for floating-point values. - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; } + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { + return nullptr; + } + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { + return nullptr; + } DECLARE_INSTRUCTION(Min); @@ -5634,10 +5664,14 @@ class HMax final : public HBinaryOperation { ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); } // TODO: Evaluation for floating-point values. - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; } + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { + return nullptr; + } + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { + return nullptr; + } DECLARE_INSTRUCTION(Max); @@ -5699,7 +5733,7 @@ class HDivZeroCheck final : public HExpression<1> { bool IsClonable() const override { return true; } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } @@ -5736,18 +5770,18 @@ class HShl final : public HBinaryOperation { return GetBlock()->GetGraph()->GetLongConstant( Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc()); } - HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED, - HLongConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HLongConstant* value, + [[maybe_unused]] HLongConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for the (long, long) case."; UNREACHABLE(); } - HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED, - HFloatConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* value, + [[maybe_unused]] HFloatConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED, - HDoubleConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* value, + [[maybe_unused]] HDoubleConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5782,18 +5816,18 @@ class HShr final : public HBinaryOperation { return GetBlock()->GetGraph()->GetLongConstant( Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc()); } - HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED, - HLongConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HLongConstant* value, + [[maybe_unused]] HLongConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for the (long, long) case."; UNREACHABLE(); } - HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED, - HFloatConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* value, + [[maybe_unused]] HFloatConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED, - HDoubleConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* value, + [[maybe_unused]] HDoubleConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5830,18 +5864,18 @@ class HUShr final : public HBinaryOperation { return GetBlock()->GetGraph()->GetLongConstant( Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc()); } - HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED, - HLongConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HLongConstant* value, + [[maybe_unused]] HLongConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for the (long, long) case."; UNREACHABLE(); } - HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED, - HFloatConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* value, + [[maybe_unused]] HFloatConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED, - HDoubleConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* value, + [[maybe_unused]] HDoubleConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5873,13 +5907,13 @@ class HAnd final : public HBinaryOperation { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5911,13 +5945,13 @@ class HOr final : public HBinaryOperation { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5949,13 +5983,13 @@ class HXor final : public HBinaryOperation { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5993,18 +6027,18 @@ class HRor final : public HBinaryOperation { return GetBlock()->GetGraph()->GetLongConstant( Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc()); } - HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED, - HLongConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HLongConstant* value, + [[maybe_unused]] HLongConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for the (long, long) case."; UNREACHABLE(); } - HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED, - HFloatConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* value, + [[maybe_unused]] HFloatConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED, - HDoubleConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* value, + [[maybe_unused]] HDoubleConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -6067,7 +6101,7 @@ class HNot final : public HUnaryOperation { } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } @@ -6079,11 +6113,11 @@ class HNot final : public HUnaryOperation { HConstant* Evaluate(HLongConstant* x) const override { return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -6101,7 +6135,7 @@ class HBooleanNot final : public HUnaryOperation { } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } @@ -6113,15 +6147,15 @@ class HBooleanNot final : public HUnaryOperation { HConstant* Evaluate(HIntConstant* x) const override { return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HLongConstant* x) const override { LOG(FATAL) << DebugName() << " is not defined for long values"; UNREACHABLE(); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -6148,7 +6182,7 @@ class HTypeConversion final : public HExpression<1> { bool IsClonable() const override { return true; } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } // Return whether the conversion is implicit. This includes conversion to the same type. @@ -6160,6 +6194,9 @@ class HTypeConversion final : public HExpression<1> { // containing the result. If the input cannot be converted, return nullptr. HConstant* TryStaticEvaluation() const; + // Same but for `input` instead of GetInput(). + HConstant* TryStaticEvaluation(HInstruction* input) const; + DECLARE_INSTRUCTION(TypeConversion); protected: @@ -6180,7 +6217,7 @@ class HNullCheck final : public HExpression<1> { bool IsClonable() const override { return true; } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } @@ -6511,12 +6548,12 @@ class HArrayGet final : public HExpression<2> { HInstruction* index, DataType::Type type, uint32_t dex_pc) - : HArrayGet(array, - index, - type, - SideEffects::ArrayReadOfType(type), - dex_pc, - /* is_string_char_at= */ false) { + : HArrayGet(array, + index, + type, + SideEffects::ArrayReadOfType(type), + dex_pc, + /* is_string_char_at= */ false) { } HArrayGet(HInstruction* array, @@ -6533,10 +6570,10 @@ class HArrayGet final : public HExpression<2> { bool IsClonable() const override { return true; } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } - bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override { + bool CanDoImplicitNullCheckOn([[maybe_unused]] HInstruction* obj) const override { // TODO: We can be smarter here. // Currently, unless the array is the result of NewArray, the array access is always // preceded by some form of null NullCheck necessary for the bounds check, usually @@ -6640,7 +6677,7 @@ class HArraySet final : public HExpression<3> { // Can throw ArrayStoreException. bool CanThrow() const override { return NeedsTypeCheck(); } - bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override { + bool CanDoImplicitNullCheckOn([[maybe_unused]] HInstruction* obj) const override { // TODO: Same as for ArrayGet. return false; } @@ -6746,7 +6783,7 @@ class HArrayLength final : public HExpression<1> { bool IsClonable() const override { return true; } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } bool CanDoImplicitNullCheckOn(HInstruction* obj) const override { @@ -6790,7 +6827,7 @@ class HBoundsCheck final : public HExpression<2> { bool IsClonable() const override { return true; } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } @@ -7000,17 +7037,15 @@ class HLoadClass final : public HInstruction { bool CanCallRuntime() const { return NeedsAccessCheck() || MustGenerateClinitCheck() || - GetLoadKind() == LoadKind::kRuntimeCall || - GetLoadKind() == LoadKind::kBssEntry; + NeedsBss() || + GetLoadKind() == LoadKind::kRuntimeCall; } bool CanThrow() const override { return NeedsAccessCheck() || MustGenerateClinitCheck() || // If the class is in the boot image, the lookup in the runtime call cannot throw. - ((GetLoadKind() == LoadKind::kRuntimeCall || - GetLoadKind() == LoadKind::kBssEntry) && - !IsInBootImage()); + ((GetLoadKind() == LoadKind::kRuntimeCall || NeedsBss()) && !IsInBootImage()); } ReferenceTypeInfo GetLoadedClassRTI() { @@ -7423,7 +7458,7 @@ class HClinitCheck final : public HExpression<1> { } // TODO: Make ClinitCheck clonable. bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } @@ -8343,7 +8378,7 @@ class HSelect final : public HExpression<3> { HInstruction* GetCondition() const { return InputAt(2); } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } @@ -8351,6 +8386,12 @@ class HSelect final : public HExpression<3> { return GetTrueValue()->CanBeNull() || GetFalseValue()->CanBeNull(); } + void UpdateType() { + DCHECK_EQ(HPhi::ToPhiType(GetTrueValue()->GetType()), + HPhi::ToPhiType(GetFalseValue()->GetType())); + SetPackedField<TypeField>(HPhi::ToPhiType(GetTrueValue()->GetType())); + } + DECLARE_INSTRUCTION(Select); protected: @@ -8513,7 +8554,7 @@ class HIntermediateAddress final : public HExpression<2> { bool IsClonable() const override { return true; } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } bool IsActualObject() const override { return false; } @@ -8550,7 +8591,7 @@ class HGraphVisitor : public ValueObject { graph_(graph) {} virtual ~HGraphVisitor() {} - virtual void VisitInstruction(HInstruction* instruction ATTRIBUTE_UNUSED) {} + virtual void VisitInstruction([[maybe_unused]] HInstruction* instruction) {} virtual void VisitBasicBlock(HBasicBlock* block); // Visit the graph following basic block insertion order. @@ -8623,7 +8664,7 @@ class CloneAndReplaceInstructionVisitor final : public HGraphDelegateVisitor { DISALLOW_COPY_AND_ASSIGN(CloneAndReplaceInstructionVisitor); }; -// Iterator over the blocks that art part of the loop. Includes blocks part +// Iterator over the blocks that are part of the loop; includes blocks which are part // of an inner loop. The order in which the blocks are iterated is on their // block id. class HBlocksInLoopIterator : public ValueObject { @@ -8656,7 +8697,7 @@ class HBlocksInLoopIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopIterator); }; -// Iterator over the blocks that art part of the loop. Includes blocks part +// Iterator over the blocks that are part of the loop; includes blocks which are part // of an inner loop. The order in which the blocks are iterated is reverse // post order. class HBlocksInLoopReversePostOrderIterator : public ValueObject { @@ -8689,6 +8730,39 @@ class HBlocksInLoopReversePostOrderIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopReversePostOrderIterator); }; +// Iterator over the blocks that are part of the loop; includes blocks which are part +// of an inner loop. The order in which the blocks are iterated is post order. +class HBlocksInLoopPostOrderIterator : public ValueObject { + public: + explicit HBlocksInLoopPostOrderIterator(const HLoopInformation& info) + : blocks_in_loop_(info.GetBlocks()), + blocks_(info.GetHeader()->GetGraph()->GetReversePostOrder()), + index_(blocks_.size() - 1) { + if (!blocks_in_loop_.IsBitSet(blocks_[index_]->GetBlockId())) { + Advance(); + } + } + + bool Done() const { return index_ < 0; } + HBasicBlock* Current() const { return blocks_[index_]; } + void Advance() { + --index_; + for (; index_ >= 0; --index_) { + if (blocks_in_loop_.IsBitSet(blocks_[index_]->GetBlockId())) { + break; + } + } + } + + private: + const BitVector& blocks_in_loop_; + const ArenaVector<HBasicBlock*>& blocks_; + + int32_t index_; + + DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopPostOrderIterator); +}; + // Returns int64_t value of a properly typed constant. inline int64_t Int64FromConstant(HConstant* constant) { if (constant->IsIntConstant()) { @@ -8752,10 +8826,18 @@ inline bool IsZeroBitPattern(HInstruction* instruction) { #define INSTRUCTION_TYPE_CAST(type, super) \ inline const H##type* HInstruction::As##type() const { \ - return Is##type() ? down_cast<const H##type*>(this) : nullptr; \ + DCHECK(Is##type()); \ + return down_cast<const H##type*>(this); \ } \ inline H##type* HInstruction::As##type() { \ - return Is##type() ? static_cast<H##type*>(this) : nullptr; \ + DCHECK(Is##type()); \ + return down_cast<H##type*>(this); \ + } \ + inline const H##type* HInstruction::As##type##OrNull() const { \ + return Is##type() ? down_cast<const H##type*>(this) : nullptr; \ + } \ + inline H##type* HInstruction::As##type##OrNull() { \ + return Is##type() ? down_cast<H##type*>(this) : nullptr; \ } FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CAST) diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h index 27e610328f..4b0187d536 100644 --- a/compiler/optimizing/nodes_shared.h +++ b/compiler/optimizing/nodes_shared.h @@ -105,13 +105,13 @@ class HBitwiseNegatedRight final : public HBinaryOperation { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -160,7 +160,7 @@ class HIntermediateAddressIndex final : public HExpression<3> { bool IsClonable() const override { return true; } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } bool IsActualObject() const override { return false; } diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index 73f6c40a0d..6a60d6be01 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -1384,8 +1384,8 @@ class HVecPredWhile final : public HVecPredSetOperation { static constexpr size_t kCondKind = HVecOperation::kNumberOfVectorOpPackedBits; static constexpr size_t kCondKindSize = MinimumBitsToStore(static_cast<size_t>(CondKind::kLast)); - static constexpr size_t kNumberOfVecPredConditionPackedBits = kCondKind + kCondKindSize; - static_assert(kNumberOfVecPredConditionPackedBits <= kMaxNumberOfPackedBits, + static constexpr size_t kNumberOfVecPredWhilePackedBits = kCondKind + kCondKindSize; + static_assert(kNumberOfVecPredWhilePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using CondKindField = BitField<CondKind, kCondKind, kCondKindSize>; @@ -1395,13 +1395,13 @@ class HVecPredWhile final : public HVecPredSetOperation { // Evaluates the predicate condition (PCondKind) for a vector predicate; outputs // a scalar boolean value result. // -// Note: as VecPredCondition can be also predicated, only active elements (determined by the +// Note: as VecPredToBoolean can be also predicated, only active elements (determined by the // instruction's governing predicate) of the input vector predicate are used for condition // evaluation. // // Note: this instruction is currently used as a workaround for the fact that IR instructions // can't have more than one output. -class HVecPredCondition final : public HVecOperation { +class HVecPredToBoolean final : public HVecOperation { public: // To get more info on the condition kinds please see "2.2 Process state, PSTATE" section of // "ARM Architecture Reference Manual Supplement. The Scalable Vector Extension (SVE), @@ -1418,13 +1418,13 @@ class HVecPredCondition final : public HVecOperation { kEnumLast = kPLast }; - HVecPredCondition(ArenaAllocator* allocator, + HVecPredToBoolean(ArenaAllocator* allocator, HInstruction* input, PCondKind pred_cond, DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecOperation(kVecPredCondition, + : HVecOperation(kVecPredToBoolean, allocator, packed_type, SideEffects::None(), @@ -1447,19 +1447,86 @@ class HVecPredCondition final : public HVecOperation { return GetPackedField<CondKindField>(); } - DECLARE_INSTRUCTION(VecPredCondition); + DECLARE_INSTRUCTION(VecPredToBoolean); protected: // Additional packed bits. static constexpr size_t kCondKind = HVecOperation::kNumberOfVectorOpPackedBits; static constexpr size_t kCondKindSize = MinimumBitsToStore(static_cast<size_t>(PCondKind::kEnumLast)); - static constexpr size_t kNumberOfVecPredConditionPackedBits = kCondKind + kCondKindSize; - static_assert(kNumberOfVecPredConditionPackedBits <= kMaxNumberOfPackedBits, + static constexpr size_t kNumberOfVecPredToBooleanPackedBits = kCondKind + kCondKindSize; + static_assert(kNumberOfVecPredToBooleanPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using CondKindField = BitField<PCondKind, kCondKind, kCondKindSize>; - DEFAULT_COPY_CONSTRUCTOR(VecPredCondition); + DEFAULT_COPY_CONSTRUCTOR(VecPredToBoolean); +}; + +// Evaluates condition for pairwise elements in two input vectors and sets the result +// as an output predicate vector. +// +// viz. [ p1, .. , pn ] = [ x1 OP y1 , x2 OP y2, .. , xn OP yn] where OP is CondKind +// condition. +// +// Currently only kEqual is supported by this vector instruction - we don't even define +// the kCondType here. +// TODO: support other condition ops. +class HVecCondition final : public HVecPredSetOperation { + public: + HVecCondition(ArenaAllocator* allocator, + HInstruction* left, + HInstruction* right, + DataType::Type packed_type, + size_t vector_length, + uint32_t dex_pc) : + HVecPredSetOperation(kVecCondition, + allocator, + packed_type, + SideEffects::None(), + /* number_of_inputs= */ 2, + vector_length, + dex_pc) { + DCHECK(left->IsVecOperation()); + DCHECK(!left->IsVecPredSetOperation()); + DCHECK(right->IsVecOperation()); + DCHECK(!right->IsVecPredSetOperation()); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + + DECLARE_INSTRUCTION(VecCondition); + + protected: + DEFAULT_COPY_CONSTRUCTOR(VecCondition); +}; + +// Inverts every component in the predicate vector. +// +// viz. [ p1, .. , pn ] = [ !px1 , !px2 , .. , !pxn ]. +class HVecPredNot final : public HVecPredSetOperation { + public: + HVecPredNot(ArenaAllocator* allocator, + HInstruction* input, + DataType::Type packed_type, + size_t vector_length, + uint32_t dex_pc) : + HVecPredSetOperation(kVecPredNot, + allocator, + packed_type, + SideEffects::None(), + /* number_of_inputs= */ 1, + vector_length, + dex_pc) { + DCHECK(input->IsVecOperation()); + DCHECK(input->IsVecPredSetOperation()); + + SetRawInputAt(0, input); + } + + DECLARE_INSTRUCTION(VecPredNot); + + protected: + DEFAULT_COPY_CONSTRUCTOR(VecPredNot); }; } // namespace art diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h index e246390aa5..14d9823355 100644 --- a/compiler/optimizing/nodes_x86.h +++ b/compiler/optimizing/nodes_x86.h @@ -149,13 +149,13 @@ class HX86AndNot final : public HBinaryOperation { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -196,11 +196,11 @@ class HX86MaskOrResetLeastSetBit final : public HUnaryOperation { HConstant* Evaluate(HLongConstant* x) const override { return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x) const override { LOG(FATAL) << DebugName() << "is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x) const override { LOG(FATAL) << DebugName() << "is not defined for double values"; UNREACHABLE(); } diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc index 12e9a1046d..4f20b55c7e 100644 --- a/compiler/optimizing/optimization.cc +++ b/compiler/optimizing/optimization.cc @@ -313,8 +313,8 @@ ArenaVector<HOptimization*> ConstructOptimizations( opt = new (allocator) x86::X86MemoryOperandGeneration(graph, codegen, stats); break; case OptimizationPass::kInstructionSimplifierX86: - opt = new (allocator) x86::InstructionSimplifierX86(graph, codegen, stats); - break; + opt = new (allocator) x86::InstructionSimplifierX86(graph, codegen, stats); + break; #endif #ifdef ART_ENABLE_CODEGEN_x86_64 case OptimizationPass::kInstructionSimplifierX86_64: diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index f12e748941..632c32a70b 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -89,7 +89,7 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { void Finish() { code_gen_->GenerateFrameExit(); - code_gen_->Finalize(&code_allocator_); + code_gen_->Finalize(); } void Check(InstructionSet isa, @@ -97,7 +97,7 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { const std::vector<uint8_t>& expected_asm, const std::vector<uint8_t>& expected_cfi) { // Get the outputs. - ArrayRef<const uint8_t> actual_asm = code_allocator_.GetMemory(); + ArrayRef<const uint8_t> actual_asm = code_gen_->GetCode(); Assembler* opt_asm = code_gen_->GetAssembler(); ArrayRef<const uint8_t> actual_cfi(*(opt_asm->cfi().data())); @@ -123,27 +123,9 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { } private: - class InternalCodeAllocator : public CodeAllocator { - public: - InternalCodeAllocator() {} - - uint8_t* Allocate(size_t size) override { - memory_.resize(size); - return memory_.data(); - } - - ArrayRef<const uint8_t> GetMemory() const override { return ArrayRef<const uint8_t>(memory_); } - - private: - std::vector<uint8_t> memory_; - - DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator); - }; - HGraph* graph_; std::unique_ptr<CodeGenerator> code_gen_; ArenaVector<HBasicBlock*> blocks_; - InternalCodeAllocator code_allocator_; }; #define TEST_ISA(isa) \ diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 00eb6e5c42..040c2449a7 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -69,28 +69,6 @@ static constexpr size_t kArenaAllocatorMemoryReportThreshold = 8 * MB; static constexpr const char* kPassNameSeparator = "$"; /** - * Used by the code generator, to allocate the code in a vector. - */ -class CodeVectorAllocator final : public CodeAllocator { - public: - explicit CodeVectorAllocator(ArenaAllocator* allocator) - : memory_(allocator->Adapter(kArenaAllocCodeBuffer)) {} - - uint8_t* Allocate(size_t size) override { - memory_.resize(size); - return &memory_[0]; - } - - ArrayRef<const uint8_t> GetMemory() const override { return ArrayRef<const uint8_t>(memory_); } - uint8_t* GetData() { return memory_.data(); } - - private: - ArenaVector<uint8_t> memory_; - - DISALLOW_COPY_AND_ASSIGN(CodeVectorAllocator); -}; - -/** * Filter to apply to the visualizer. Methods whose name contain that filter will * be dumped. */ @@ -361,7 +339,6 @@ class OptimizingCompiler final : public Compiler { // Create a 'CompiledMethod' for an optimized graph. CompiledMethod* Emit(ArenaAllocator* allocator, - CodeVectorAllocator* code_allocator, CodeGenerator* codegen, bool is_intrinsic, const dex::CodeItem* item) const; @@ -372,10 +349,8 @@ class OptimizingCompiler final : public Compiler { // 1) Builds the graph. Returns null if it failed to build it. // 2) Transforms the graph to SSA. Returns null if it failed. // 3) Runs optimizations on the graph, including register allocator. - // 4) Generates code with the `code_allocator` provided. CodeGenerator* TryCompile(ArenaAllocator* allocator, ArenaStack* arena_stack, - CodeVectorAllocator* code_allocator, const DexCompilationUnit& dex_compilation_unit, ArtMethod* method, CompilationKind compilation_kind, @@ -383,7 +358,6 @@ class OptimizingCompiler final : public Compiler { CodeGenerator* TryCompileIntrinsic(ArenaAllocator* allocator, ArenaStack* arena_stack, - CodeVectorAllocator* code_allocator, const DexCompilationUnit& dex_compilation_unit, ArtMethod* method, VariableSizedHandleScope* handles) const; @@ -440,24 +414,33 @@ void OptimizingCompiler::DumpInstructionSetFeaturesToCfg() const { std::string isa_string = std::string("isa:") + GetInstructionSetString(features->GetInstructionSet()); std::string features_string = "isa_features:" + features->GetFeatureString(); + std::string read_barrier_type = "none"; + if (gUseReadBarrier) { + if (art::kUseBakerReadBarrier) + read_barrier_type = "baker"; + else if (art::kUseTableLookupReadBarrier) + read_barrier_type = "tablelookup"; + } + std::string read_barrier_string = ART_FORMAT("read_barrier_type:{}", read_barrier_type); // It is assumed that visualizer_output_ is empty when calling this function, hence the fake // compilation block containing the ISA features will be printed at the beginning of the .cfg // file. - *visualizer_output_ - << HGraphVisualizer::InsertMetaDataAsCompilationBlock(isa_string + ' ' + features_string); + *visualizer_output_ << HGraphVisualizer::InsertMetaDataAsCompilationBlock( + isa_string + ' ' + features_string + ' ' + read_barrier_string); } -bool OptimizingCompiler::CanCompileMethod(uint32_t method_idx ATTRIBUTE_UNUSED, - const DexFile& dex_file ATTRIBUTE_UNUSED) const { +bool OptimizingCompiler::CanCompileMethod([[maybe_unused]] uint32_t method_idx, + [[maybe_unused]] const DexFile& dex_file) const { return true; } static bool IsInstructionSetSupported(InstructionSet instruction_set) { - return instruction_set == InstructionSet::kArm - || instruction_set == InstructionSet::kArm64 - || instruction_set == InstructionSet::kThumb2 - || instruction_set == InstructionSet::kX86 - || instruction_set == InstructionSet::kX86_64; + return instruction_set == InstructionSet::kArm || + instruction_set == InstructionSet::kArm64 || + instruction_set == InstructionSet::kThumb2 || + instruction_set == InstructionSet::kRiscv64 || + instruction_set == InstructionSet::kX86 || + instruction_set == InstructionSet::kX86_64; } bool OptimizingCompiler::RunBaselineOptimizations(HGraph* graph, @@ -469,7 +452,7 @@ bool OptimizingCompiler::RunBaselineOptimizations(HGraph* graph, case InstructionSet::kThumb2: case InstructionSet::kArm: { OptimizationDef arm_optimizations[] = { - OptDef(OptimizationPass::kCriticalNativeAbiFixupArm), + OptDef(OptimizationPass::kCriticalNativeAbiFixupArm), }; return RunOptimizations(graph, codegen, @@ -481,7 +464,7 @@ bool OptimizingCompiler::RunBaselineOptimizations(HGraph* graph, #ifdef ART_ENABLE_CODEGEN_x86 case InstructionSet::kX86: { OptimizationDef x86_optimizations[] = { - OptDef(OptimizationPass::kPcRelativeFixupsX86), + OptDef(OptimizationPass::kPcRelativeFixupsX86), }; return RunOptimizations(graph, codegen, @@ -508,11 +491,11 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph, case InstructionSet::kThumb2: case InstructionSet::kArm: { OptimizationDef arm_optimizations[] = { - OptDef(OptimizationPass::kInstructionSimplifierArm), - OptDef(OptimizationPass::kSideEffectsAnalysis), - OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), - OptDef(OptimizationPass::kCriticalNativeAbiFixupArm), - OptDef(OptimizationPass::kScheduling) + OptDef(OptimizationPass::kInstructionSimplifierArm), + OptDef(OptimizationPass::kSideEffectsAnalysis), + OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), + OptDef(OptimizationPass::kCriticalNativeAbiFixupArm), + OptDef(OptimizationPass::kScheduling) }; return RunOptimizations(graph, codegen, @@ -524,10 +507,10 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph, #ifdef ART_ENABLE_CODEGEN_arm64 case InstructionSet::kArm64: { OptimizationDef arm64_optimizations[] = { - OptDef(OptimizationPass::kInstructionSimplifierArm64), - OptDef(OptimizationPass::kSideEffectsAnalysis), - OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), - OptDef(OptimizationPass::kScheduling) + OptDef(OptimizationPass::kInstructionSimplifierArm64), + OptDef(OptimizationPass::kSideEffectsAnalysis), + OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), + OptDef(OptimizationPass::kScheduling) }; return RunOptimizations(graph, codegen, @@ -539,11 +522,11 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph, #ifdef ART_ENABLE_CODEGEN_x86 case InstructionSet::kX86: { OptimizationDef x86_optimizations[] = { - OptDef(OptimizationPass::kInstructionSimplifierX86), - OptDef(OptimizationPass::kSideEffectsAnalysis), - OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), - OptDef(OptimizationPass::kPcRelativeFixupsX86), - OptDef(OptimizationPass::kX86MemoryOperandGeneration) + OptDef(OptimizationPass::kInstructionSimplifierX86), + OptDef(OptimizationPass::kSideEffectsAnalysis), + OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), + OptDef(OptimizationPass::kPcRelativeFixupsX86), + OptDef(OptimizationPass::kX86MemoryOperandGeneration) }; return RunOptimizations(graph, codegen, @@ -555,10 +538,10 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph, #ifdef ART_ENABLE_CODEGEN_x86_64 case InstructionSet::kX86_64: { OptimizationDef x86_64_optimizations[] = { - OptDef(OptimizationPass::kInstructionSimplifierX86_64), - OptDef(OptimizationPass::kSideEffectsAnalysis), - OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), - OptDef(OptimizationPass::kX86MemoryOperandGeneration) + OptDef(OptimizationPass::kInstructionSimplifierX86_64), + OptDef(OptimizationPass::kSideEffectsAnalysis), + OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), + OptDef(OptimizationPass::kX86MemoryOperandGeneration) }; return RunOptimizations(graph, codegen, @@ -633,68 +616,68 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, } OptimizationDef optimizations[] = { - // Initial optimizations. - OptDef(OptimizationPass::kConstantFolding), - OptDef(OptimizationPass::kInstructionSimplifier), - OptDef(OptimizationPass::kDeadCodeElimination, - "dead_code_elimination$initial"), - // Inlining. - OptDef(OptimizationPass::kInliner), - // Simplification (if inlining occurred, or if we analyzed the invoke as "always throwing"). - OptDef(OptimizationPass::kConstantFolding, - "constant_folding$after_inlining", - OptimizationPass::kInliner), - OptDef(OptimizationPass::kInstructionSimplifier, - "instruction_simplifier$after_inlining", - OptimizationPass::kInliner), - OptDef(OptimizationPass::kDeadCodeElimination, - "dead_code_elimination$after_inlining", - OptimizationPass::kInliner), - // GVN. - OptDef(OptimizationPass::kSideEffectsAnalysis, - "side_effects$before_gvn"), - OptDef(OptimizationPass::kGlobalValueNumbering), - // Simplification (TODO: only if GVN occurred). - OptDef(OptimizationPass::kSelectGenerator), - OptDef(OptimizationPass::kAggressiveConstantFolding, - "constant_folding$after_gvn"), - OptDef(OptimizationPass::kInstructionSimplifier, - "instruction_simplifier$after_gvn"), - OptDef(OptimizationPass::kDeadCodeElimination, - "dead_code_elimination$after_gvn"), - // High-level optimizations. - OptDef(OptimizationPass::kSideEffectsAnalysis, - "side_effects$before_licm"), - OptDef(OptimizationPass::kInvariantCodeMotion), - OptDef(OptimizationPass::kInductionVarAnalysis), - OptDef(OptimizationPass::kBoundsCheckElimination), - OptDef(OptimizationPass::kLoopOptimization), - // Simplification. - OptDef(OptimizationPass::kConstantFolding, - "constant_folding$after_loop_opt"), - OptDef(OptimizationPass::kAggressiveInstructionSimplifier, - "instruction_simplifier$after_loop_opt"), - OptDef(OptimizationPass::kDeadCodeElimination, - "dead_code_elimination$after_loop_opt"), - // Other high-level optimizations. - OptDef(OptimizationPass::kLoadStoreElimination), - OptDef(OptimizationPass::kCHAGuardOptimization), - OptDef(OptimizationPass::kCodeSinking), - // Simplification. - OptDef(OptimizationPass::kConstantFolding, - "constant_folding$before_codegen"), - // The codegen has a few assumptions that only the instruction simplifier - // can satisfy. For example, the code generator does not expect to see a - // HTypeConversion from a type to the same type. - OptDef(OptimizationPass::kAggressiveInstructionSimplifier, - "instruction_simplifier$before_codegen"), - // Simplification may result in dead code that should be removed prior to - // code generation. - OptDef(OptimizationPass::kDeadCodeElimination, - "dead_code_elimination$before_codegen"), - // Eliminate constructor fences after code sinking to avoid - // complicated sinking logic to split a fence with many inputs. - OptDef(OptimizationPass::kConstructorFenceRedundancyElimination) + // Initial optimizations. + OptDef(OptimizationPass::kConstantFolding), + OptDef(OptimizationPass::kInstructionSimplifier), + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$initial"), + // Inlining. + OptDef(OptimizationPass::kInliner), + // Simplification (if inlining occurred, or if we analyzed the invoke as "always throwing"). + OptDef(OptimizationPass::kConstantFolding, + "constant_folding$after_inlining", + OptimizationPass::kInliner), + OptDef(OptimizationPass::kInstructionSimplifier, + "instruction_simplifier$after_inlining", + OptimizationPass::kInliner), + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$after_inlining", + OptimizationPass::kInliner), + // GVN. + OptDef(OptimizationPass::kSideEffectsAnalysis, + "side_effects$before_gvn"), + OptDef(OptimizationPass::kGlobalValueNumbering), + // Simplification (TODO: only if GVN occurred). + OptDef(OptimizationPass::kSelectGenerator), + OptDef(OptimizationPass::kAggressiveConstantFolding, + "constant_folding$after_gvn"), + OptDef(OptimizationPass::kInstructionSimplifier, + "instruction_simplifier$after_gvn"), + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$after_gvn"), + // High-level optimizations. + OptDef(OptimizationPass::kSideEffectsAnalysis, + "side_effects$before_licm"), + OptDef(OptimizationPass::kInvariantCodeMotion), + OptDef(OptimizationPass::kInductionVarAnalysis), + OptDef(OptimizationPass::kBoundsCheckElimination), + OptDef(OptimizationPass::kLoopOptimization), + // Simplification. + OptDef(OptimizationPass::kConstantFolding, + "constant_folding$after_loop_opt"), + OptDef(OptimizationPass::kAggressiveInstructionSimplifier, + "instruction_simplifier$after_loop_opt"), + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$after_loop_opt"), + // Other high-level optimizations. + OptDef(OptimizationPass::kLoadStoreElimination), + OptDef(OptimizationPass::kCHAGuardOptimization), + OptDef(OptimizationPass::kCodeSinking), + // Simplification. + OptDef(OptimizationPass::kConstantFolding, + "constant_folding$before_codegen"), + // The codegen has a few assumptions that only the instruction simplifier + // can satisfy. For example, the code generator does not expect to see a + // HTypeConversion from a type to the same type. + OptDef(OptimizationPass::kAggressiveInstructionSimplifier, + "instruction_simplifier$before_codegen"), + // Simplification may result in dead code that should be removed prior to + // code generation. + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$before_codegen"), + // Eliminate constructor fences after code sinking to avoid + // complicated sinking logic to split a fence with many inputs. + OptDef(OptimizationPass::kConstructorFenceRedundancyElimination) }; RunOptimizations(graph, codegen, @@ -719,7 +702,6 @@ static ArenaVector<linker::LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* } CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator, - CodeVectorAllocator* code_allocator, CodeGenerator* codegen, bool is_intrinsic, const dex::CodeItem* code_item_for_osr_check) const { @@ -729,7 +711,7 @@ CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator, CompiledCodeStorage* storage = GetCompiledCodeStorage(); CompiledMethod* compiled_method = storage->CreateCompiledMethod( codegen->GetInstructionSet(), - code_allocator->GetMemory(), + codegen->GetCode(), ArrayRef<const uint8_t>(stack_map), ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), ArrayRef<const linker::LinkerPatch>(linker_patches), @@ -747,9 +729,92 @@ CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator, return compiled_method; } +// TODO(riscv64): Remove this check when codegen is complete. +#ifdef ART_ENABLE_CODEGEN_riscv64 +static bool CanAssembleGraphForRiscv64(HGraph* graph) { + for (HBasicBlock* block : graph->GetPostOrder()) { + // Phis are implemented (and they have no code to emit), so check only non-Phi instructions. + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + switch (it.Current()->GetKind()) { + case HInstruction::kParallelMove: + // ParallelMove is supported but it is inserted by the register allocator + // and this check is done before register allocation. + LOG(FATAL) << "Unexpected ParallelMove before register allocation!"; + UNREACHABLE(); + case HInstruction::kExit: + case HInstruction::kGoto: + case HInstruction::kParameterValue: + case HInstruction::kReturn: + case HInstruction::kReturnVoid: + case HInstruction::kSuspendCheck: + case HInstruction::kDoubleConstant: + case HInstruction::kFloatConstant: + case HInstruction::kIntConstant: + case HInstruction::kLongConstant: + case HInstruction::kNullConstant: + case HInstruction::kLoadClass: + case HInstruction::kLoadString: + case HInstruction::kLoadMethodHandle: + case HInstruction::kLoadMethodType: + case HInstruction::kInstanceFieldGet: + case HInstruction::kStaticFieldGet: + case HInstruction::kArrayGet: + case HInstruction::kAbove: + case HInstruction::kAboveOrEqual: + case HInstruction::kBelow: + case HInstruction::kBelowOrEqual: + case HInstruction::kEqual: + case HInstruction::kGreaterThan: + case HInstruction::kGreaterThanOrEqual: + case HInstruction::kLessThan: + case HInstruction::kLessThanOrEqual: + case HInstruction::kNotEqual: + case HInstruction::kCompare: + case HInstruction::kIf: + case HInstruction::kAdd: + case HInstruction::kAnd: + case HInstruction::kOr: + case HInstruction::kSub: + case HInstruction::kXor: + case HInstruction::kRor: + case HInstruction::kShl: + case HInstruction::kShr: + case HInstruction::kUShr: + case HInstruction::kAbs: + case HInstruction::kBooleanNot: + case HInstruction::kMul: + case HInstruction::kNeg: + case HInstruction::kNot: + case HInstruction::kMin: + case HInstruction::kMax: + case HInstruction::kInvokeVirtual: + case HInstruction::kInvokeInterface: + case HInstruction::kCurrentMethod: + case HInstruction::kNullCheck: + break; + case HInstruction::kInvokeStaticOrDirect: + if (it.Current()->AsInvokeStaticOrDirect()->GetCodePtrLocation() == + CodePtrLocation::kCallCriticalNative && + it.Current()->AsInvokeStaticOrDirect()->GetNumberOfArguments() >= 8u) { + // TODO(riscv64): If there are more than 8 FP args, some may be passed in GPRs + // and this requires a `CriticalNativeAbiFixupRiscv64` pass similar to the one + // we have for ARM. This is not yet implemented. For simplicity, we reject all + // direct @CriticalNative calls with more than 8 args. + return false; + } + break; + default: + // Unimplemented instruction. + return false; + } + } + } + return true; +} +#endif + CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, ArenaStack* arena_stack, - CodeVectorAllocator* code_allocator, const DexCompilationUnit& dex_compilation_unit, ArtMethod* method, CompilationKind compilation_kind, @@ -906,6 +971,15 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, WriteBarrierElimination(graph, compilation_stats_.get()).Run(); } + // TODO(riscv64): Remove this check when codegen is complete. +#ifdef ART_ENABLE_CODEGEN_riscv64 + if (instruction_set == InstructionSet::kRiscv64 && !CanAssembleGraphForRiscv64(graph)) { + MaybeRecordStat(compilation_stats_.get(), + MethodCompilationStat::kNotCompiledUnsupportedIsa); + return nullptr; + } +#endif + RegisterAllocator::Strategy regalloc_strategy = compiler_options.GetRegisterAllocationStrategy(); AllocateRegisters(graph, @@ -914,7 +988,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, regalloc_strategy, compilation_stats_.get()); - codegen->Compile(code_allocator); + codegen->Compile(); pass_observer.DumpDisassembly(); MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiledBytecode); @@ -924,7 +998,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( ArenaAllocator* allocator, ArenaStack* arena_stack, - CodeVectorAllocator* code_allocator, const DexCompilationUnit& dex_compilation_unit, ArtMethod* method, VariableSizedHandleScope* handles) const { @@ -986,9 +1059,9 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( } OptimizationDef optimizations[] = { - // The codegen has a few assumptions that only the instruction simplifier - // can satisfy. - OptDef(OptimizationPass::kInstructionSimplifier), + // The codegen has a few assumptions that only the instruction simplifier + // can satisfy. + OptDef(OptimizationPass::kInstructionSimplifier), }; RunOptimizations(graph, codegen.get(), @@ -1002,6 +1075,15 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( WriteBarrierElimination(graph, compilation_stats_.get()).Run(); } + // TODO(riscv64): Remove this check when codegen is complete. +#ifdef ART_ENABLE_CODEGEN_riscv64 + if (instruction_set == InstructionSet::kRiscv64 && !CanAssembleGraphForRiscv64(graph)) { + MaybeRecordStat(compilation_stats_.get(), + MethodCompilationStat::kNotCompiledUnsupportedIsa); + return nullptr; + } +#endif + AllocateRegisters(graph, codegen.get(), &pass_observer, @@ -1013,7 +1095,7 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( return nullptr; } - codegen->Compile(code_allocator); + codegen->Compile(); pass_observer.DumpDisassembly(); VLOG(compiler) << "Compiled intrinsic: " << method->GetIntrinsic() @@ -1037,7 +1119,6 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item, DCHECK(runtime->IsAotCompiler()); ArenaAllocator allocator(runtime->GetArenaPool()); ArenaStack arena_stack(runtime->GetArenaPool()); - CodeVectorAllocator code_allocator(&allocator); std::unique_ptr<CodeGenerator> codegen; bool compiled_intrinsic = false; { @@ -1071,7 +1152,6 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item, codegen.reset( TryCompileIntrinsic(&allocator, &arena_stack, - &code_allocator, dex_compilation_unit, method, &handles)); @@ -1083,7 +1163,6 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item, codegen.reset( TryCompile(&allocator, &arena_stack, - &code_allocator, dex_compilation_unit, method, compiler_options.IsBaseline() @@ -1094,7 +1173,6 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item, } if (codegen.get() != nullptr) { compiled_method = Emit(&allocator, - &code_allocator, codegen.get(), compiled_intrinsic, compiled_intrinsic ? nullptr : code_item); @@ -1115,7 +1193,9 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item, if (kIsDebugBuild && compiler_options.CompileArtTest() && - IsInstructionSetSupported(compiler_options.GetInstructionSet())) { + IsInstructionSetSupported(compiler_options.GetInstructionSet()) && + // TODO(riscv64): Enable this check when codegen is complete. + compiler_options.GetInstructionSet() != InstructionSet::kRiscv64) { // For testing purposes, we put a special marker on method names // that should be compiled with this compiler (when the // instruction set is supported). This makes sure we're not @@ -1177,19 +1257,16 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags, /*verified_method=*/ nullptr, dex_cache, compiling_class); - CodeVectorAllocator code_allocator(&allocator); // Go to native so that we don't block GC during compilation. ScopedThreadSuspension sts(soa.Self(), ThreadState::kNative); std::unique_ptr<CodeGenerator> codegen( TryCompileIntrinsic(&allocator, &arena_stack, - &code_allocator, dex_compilation_unit, method, &handles)); if (codegen != nullptr) { return Emit(&allocator, - &code_allocator, codegen.get(), /*is_intrinsic=*/ true, /*item=*/ nullptr); @@ -1221,7 +1298,7 @@ Compiler* CreateOptimizingCompiler(const CompilerOptions& compiler_options, return new OptimizingCompiler(compiler_options, storage); } -bool EncodeArtMethodInInlineInfo(ArtMethod* method ATTRIBUTE_UNUSED) { +bool EncodeArtMethodInInlineInfo([[maybe_unused]] ArtMethod* method) { // Note: the runtime is null only for unit testing. return Runtime::Current() == nullptr || !Runtime::Current()->IsAotCompiler(); } @@ -1328,7 +1405,6 @@ bool OptimizingCompiler::JitCompile(Thread* self, debug_info, /* is_full_debug_info= */ compiler_options.GetGenerateDebugInfo(), compilation_kind, - /* has_should_deoptimize_flag= */ false, cha_single_implementation_list)) { code_cache->Free(self, region, reserved_code.data(), reserved_data.data()); return false; @@ -1342,7 +1418,6 @@ bool OptimizingCompiler::JitCompile(Thread* self, } ArenaStack arena_stack(runtime->GetJitArenaPool()); - CodeVectorAllocator code_allocator(&allocator); VariableSizedHandleScope handles(self); std::unique_ptr<CodeGenerator> codegen; @@ -1365,7 +1440,6 @@ bool OptimizingCompiler::JitCompile(Thread* self, codegen.reset( TryCompile(&allocator, &arena_stack, - &code_allocator, dex_compilation_unit, method, compilation_kind, @@ -1381,7 +1455,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, ArrayRef<const uint8_t> reserved_data; if (!code_cache->Reserve(self, region, - code_allocator.GetMemory().size(), + codegen->GetAssembler()->CodeSize(), stack_map.size(), /*number_of_roots=*/codegen->GetNumberOfJitRoots(), method, @@ -1394,7 +1468,9 @@ bool OptimizingCompiler::JitCompile(Thread* self, const uint8_t* roots_data = reserved_data.data(); std::vector<Handle<mirror::Object>> roots; - codegen->EmitJitRoots(code_allocator.GetData(), roots_data, &roots); + codegen->EmitJitRoots(const_cast<uint8_t*>(codegen->GetAssembler()->CodeBufferBaseAddress()), + roots_data, + &roots); // The root Handle<>s filled by the codegen reference entries in the VariableSizedHandleScope. DCHECK(std::all_of(roots.begin(), roots.end(), @@ -1418,7 +1494,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.is_optimized = true; info.is_code_address_text_relative = false; info.code_address = reinterpret_cast<uintptr_t>(code); - info.code_size = code_allocator.GetMemory().size(); + info.code_size = codegen->GetAssembler()->CodeSize(), info.frame_size_in_bytes = codegen->GetFrameSize(); info.code_info = stack_map.size() == 0 ? nullptr : stack_map.data(); info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()); @@ -1429,22 +1505,23 @@ bool OptimizingCompiler::JitCompile(Thread* self, region, method, reserved_code, - code_allocator.GetMemory(), + codegen->GetCode(), reserved_data, roots, ArrayRef<const uint8_t>(stack_map), debug_info, /* is_full_debug_info= */ compiler_options.GetGenerateDebugInfo(), compilation_kind, - codegen->GetGraph()->HasShouldDeoptimizeFlag(), codegen->GetGraph()->GetCHASingleImplementationList())) { + CHECK_EQ(CodeInfo::HasShouldDeoptimizeFlag(stack_map.data()), + codegen->GetGraph()->HasShouldDeoptimizeFlag()); code_cache->Free(self, region, reserved_code.data(), reserved_data.data()); return false; } Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed()); if (jit_logger != nullptr) { - jit_logger->WriteLog(code, code_allocator.GetMemory().size(), method); + jit_logger->WriteLog(code, codegen->GetAssembler()->CodeSize(), method); } if (kArenaAllocatorCountAllocations) { diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index a1c05e9cad..d2b993280d 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -81,8 +81,8 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap { message_ << ")"; } - void SpillScratch(int reg ATTRIBUTE_UNUSED) override {} - void RestoreScratch(int reg ATTRIBUTE_UNUSED) override {} + void SpillScratch([[maybe_unused]] int reg) override {} + void RestoreScratch([[maybe_unused]] int reg) override {} std::string GetMessage() const { return message_.str(); @@ -126,7 +126,7 @@ class TestParallelMoveResolverNoSwap : public ParallelMoveResolverNoSwap { return scratch; } - void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) override {} + void FreeScratchLocation([[maybe_unused]] Location loc) override {} void EmitMove(size_t index) override { MoveOperands* move = moves_[index]; diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index d3da3d3ce1..56341f106f 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -62,7 +62,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor { } void VisitReturn(HReturn* ret) override { - HConstant* value = ret->InputAt(0)->AsConstant(); + HConstant* value = ret->InputAt(0)->AsConstantOrNull(); if ((value != nullptr && DataType::IsFloatingPointType(value->GetType()))) { ReplaceInput(ret, value, 0, true); } @@ -95,7 +95,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor { } void BinaryFP(HBinaryOperation* bin) { - HConstant* rhs = bin->InputAt(1)->AsConstant(); + HConstant* rhs = bin->InputAt(1)->AsConstantOrNull(); if (rhs != nullptr && DataType::IsFloatingPointType(rhs->GetType())) { ReplaceInput(bin, rhs, 1, false); } @@ -193,7 +193,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor { } void HandleInvoke(HInvoke* invoke) { - HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); + HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirectOrNull(); // If this is an invoke-static/-direct with PC-relative addressing (within boot image // or using .bss or .data.bimg.rel.ro), we need the PC-relative address base. @@ -207,7 +207,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor { base_added = true; } - HInvokeInterface* invoke_interface = invoke->AsInvokeInterface(); + HInvokeInterface* invoke_interface = invoke->AsInvokeInterfaceOrNull(); if (invoke_interface != nullptr && IsPcRelativeMethodLoadKind(invoke_interface->GetHiddenArgumentLoadKind())) { HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(invoke); @@ -219,7 +219,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor { // Ensure that we can load FP arguments from the constant area. HInputsRef inputs = invoke->GetInputs(); for (size_t i = 0; i < inputs.size(); i++) { - HConstant* input = inputs[i]->AsConstant(); + HConstant* input = inputs[i]->AsConstantOrNull(); if (input != nullptr && DataType::IsFloatingPointType(input->GetType())) { ReplaceInput(invoke, input, i, true); } diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 91bae5f49b..3a5cceed9a 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -254,7 +254,7 @@ static void BoundTypeForClassCheck(HInstruction* check) { HInstruction* input_two = compare->InputAt(1); HLoadClass* load_class = input_one->IsLoadClass() ? input_one->AsLoadClass() - : input_two->AsLoadClass(); + : input_two->AsLoadClassOrNull(); if (load_class == nullptr) { return; } @@ -335,7 +335,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitBasicBlock(HBasicBlock* block) { } void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfNotNull(HBasicBlock* block) { - HIf* ifInstruction = block->GetLastInstruction()->AsIf(); + HIf* ifInstruction = block->GetLastInstruction()->AsIfOrNull(); if (ifInstruction == nullptr) { return; } @@ -453,7 +453,7 @@ static bool MatchIfInstanceOf(HIf* ifInstruction, // If that's the case insert an HBoundType instruction to bound the type of `x` // to `ClassX` in the scope of the dominated blocks. void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfInstanceOf(HBasicBlock* block) { - HIf* ifInstruction = block->GetLastInstruction()->AsIf(); + HIf* ifInstruction = block->GetLastInstruction()->AsIfOrNull(); if (ifInstruction == nullptr) { return; } @@ -539,9 +539,14 @@ void ReferenceTypePropagation::RTPVisitor::UpdateReferenceTypeInfo(HInstruction* DCHECK_EQ(instr->GetType(), DataType::Type::kReference); ScopedObjectAccess soa(Thread::Current()); - ObjPtr<mirror::DexCache> dex_cache = FindDexCacheWithHint(soa.Self(), dex_file, hint_dex_cache_); - ObjPtr<mirror::Class> klass = Runtime::Current()->GetClassLinker()->LookupResolvedType( - type_idx, dex_cache, dex_cache->GetClassLoader()); + StackHandleScope<2> hs(soa.Self()); + Handle<mirror::DexCache> dex_cache = + hs.NewHandle(FindDexCacheWithHint(soa.Self(), dex_file, hint_dex_cache_)); + Handle<mirror::ClassLoader> loader = hs.NewHandle(dex_cache->GetClassLoader()); + ObjPtr<mirror::Class> klass = Runtime::Current()->GetClassLinker()->ResolveType( + type_idx, dex_cache, loader); + DCHECK_EQ(klass == nullptr, soa.Self()->IsExceptionPending()); + soa.Self()->ClearException(); // Clean up the exception left by type resolution if any. SetClassAsTypeInfo(instr, klass, is_exact); } @@ -704,7 +709,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) { } void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast) { - HBoundType* bound_type = check_cast->GetNext()->AsBoundType(); + HBoundType* bound_type = check_cast->GetNext()->AsBoundTypeOrNull(); if (bound_type == nullptr || bound_type->GetUpperBound().IsValid()) { // The next instruction is not an uninitialized BoundType. This must be // an RTP pass after SsaBuilder and we do not need to do anything. diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc index 2b012fcd67..ffd94e56b5 100644 --- a/compiler/optimizing/reference_type_propagation_test.cc +++ b/compiler/optimizing/reference_type_propagation_test.cc @@ -468,7 +468,7 @@ TEST_P(LoopReferenceTypePropagationTestGroup, RunVisitTest) { LoopOptions lo(GetParam()); std::default_random_engine g( lo.initial_null_state_ != InitialNullState::kTrueRandom ? 42 : std::rand()); - std::uniform_int_distribution<bool> uid(false, true); + std::uniform_int_distribution<int> uid(0, 1); RunVisitListTest([&](std::vector<HInstruction*>& lst, HInstruction* null_input) { auto pred_null = false; auto next_null = [&]() { @@ -482,7 +482,7 @@ TEST_P(LoopReferenceTypePropagationTestGroup, RunVisitTest) { return pred_null; case InitialNullState::kRandomSetSeed: case InitialNullState::kTrueRandom: - return uid(g); + return uid(g) > 0; } }; HPhi* nulled_phi = lo.null_insertion_ >= 0 ? lst[lo.null_insertion_]->AsPhi() : nullptr; diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index 53e11f2c3d..a4b1698b8d 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -531,9 +531,9 @@ void RegisterAllocationResolver::AddInputMoveFor(HInstruction* input, HInstruction* previous = user->GetPrevious(); HParallelMove* move = nullptr; - if (previous == nullptr - || !previous->IsParallelMove() - || previous->GetLifetimePosition() < user->GetLifetimePosition()) { + if (previous == nullptr || + !previous->IsParallelMove() || + previous->GetLifetimePosition() < user->GetLifetimePosition()) { move = new (allocator_) HParallelMove(allocator_); move->SetLifetimePosition(user->GetLifetimePosition()); user->GetBlock()->InsertInstructionBefore(move, user); @@ -593,7 +593,7 @@ void RegisterAllocationResolver::InsertParallelMoveAt(size_t position, } else if (IsInstructionEnd(position)) { // Move must happen after the instruction. DCHECK(!at->IsControlFlow()); - move = at->GetNext()->AsParallelMove(); + move = at->GetNext()->AsParallelMoveOrNull(); // This is a parallel move for connecting siblings in a same block. We need to // differentiate it with moves for connecting blocks, and input moves. if (move == nullptr || move->GetLifetimePosition() > position) { @@ -604,15 +604,15 @@ void RegisterAllocationResolver::InsertParallelMoveAt(size_t position, } else { // Move must happen before the instruction. HInstruction* previous = at->GetPrevious(); - if (previous == nullptr - || !previous->IsParallelMove() - || previous->GetLifetimePosition() != position) { + if (previous == nullptr || + !previous->IsParallelMove() || + previous->GetLifetimePosition() != position) { // If the previous is a parallel move, then its position must be lower // than the given `position`: it was added just after the non-parallel // move instruction that precedes `instruction`. - DCHECK(previous == nullptr - || !previous->IsParallelMove() - || previous->GetLifetimePosition() < position); + DCHECK(previous == nullptr || + !previous->IsParallelMove() || + previous->GetLifetimePosition() < position); move = new (allocator_) HParallelMove(allocator_); move->SetLifetimePosition(position); at->GetBlock()->InsertInstructionBefore(move, at); @@ -643,8 +643,9 @@ void RegisterAllocationResolver::InsertParallelMoveAtExitOf(HBasicBlock* block, // This is a parallel move for connecting blocks. We need to differentiate // it with moves for connecting siblings in a same block, and output moves. size_t position = last->GetLifetimePosition(); - if (previous == nullptr || !previous->IsParallelMove() - || previous->AsParallelMove()->GetLifetimePosition() != position) { + if (previous == nullptr || + !previous->IsParallelMove() || + previous->AsParallelMove()->GetLifetimePosition() != position) { move = new (allocator_) HParallelMove(allocator_); move->SetLifetimePosition(position); block->InsertInstructionBefore(move, last); @@ -662,7 +663,7 @@ void RegisterAllocationResolver::InsertParallelMoveAtEntryOf(HBasicBlock* block, if (source.Equals(destination)) return; HInstruction* first = block->GetFirstInstruction(); - HParallelMove* move = first->AsParallelMove(); + HParallelMove* move = first->AsParallelMoveOrNull(); size_t position = block->GetLifetimeStart(); // This is a parallel move for connecting blocks. We need to differentiate // it with moves for connecting siblings in a same block, and input moves. @@ -686,7 +687,7 @@ void RegisterAllocationResolver::InsertMoveAfter(HInstruction* instruction, } size_t position = instruction->GetLifetimePosition() + 1; - HParallelMove* move = instruction->GetNext()->AsParallelMove(); + HParallelMove* move = instruction->GetNext()->AsParallelMoveOrNull(); // This is a parallel move for moving the output of an instruction. We need // to differentiate with input moves, moves for connecting siblings in a // and moves for connecting blocks. diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index e4c2d74908..f8b057d4a8 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -23,7 +23,6 @@ #include "base/scoped_arena_containers.h" #include "base/bit_vector-inl.h" #include "code_generator.h" -#include "register_allocator_graph_color.h" #include "register_allocator_linear_scan.h" #include "ssa_liveness_analysis.h" @@ -45,8 +44,8 @@ std::unique_ptr<RegisterAllocator> RegisterAllocator::Create(ScopedArenaAllocato return std::unique_ptr<RegisterAllocator>( new (allocator) RegisterAllocatorLinearScan(allocator, codegen, analysis)); case kRegisterAllocatorGraphColor: - return std::unique_ptr<RegisterAllocator>( - new (allocator) RegisterAllocatorGraphColor(allocator, codegen, analysis)); + LOG(FATAL) << "Graph coloring register allocator has been removed."; + UNREACHABLE(); default: LOG(FATAL) << "Invalid register allocation strategy: " << strategy; UNREACHABLE(); diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc deleted file mode 100644 index a7c891d4e7..0000000000 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ /dev/null @@ -1,2086 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "register_allocator_graph_color.h" - -#include "code_generator.h" -#include "linear_order.h" -#include "register_allocation_resolver.h" -#include "ssa_liveness_analysis.h" -#include "thread-current-inl.h" - -namespace art HIDDEN { - -// Highest number of registers that we support for any platform. This can be used for std::bitset, -// for example, which needs to know its size at compile time. -static constexpr size_t kMaxNumRegs = 32; - -// The maximum number of graph coloring attempts before triggering a DCHECK. -// This is meant to catch changes to the graph coloring algorithm that undermine its forward -// progress guarantees. Forward progress for the algorithm means splitting live intervals on -// every graph coloring attempt so that eventually the interference graph will be sparse enough -// to color. The main threat to forward progress is trying to split short intervals which cannot be -// split further; this could cause infinite looping because the interference graph would never -// change. This is avoided by prioritizing short intervals before long ones, so that long -// intervals are split when coloring fails. -static constexpr size_t kMaxGraphColoringAttemptsDebug = 100; - -// We always want to avoid spilling inside loops. -static constexpr size_t kLoopSpillWeightMultiplier = 10; - -// If we avoid moves in single jump blocks, we can avoid jumps to jumps. -static constexpr size_t kSingleJumpBlockWeightMultiplier = 2; - -// We avoid moves in blocks that dominate the exit block, since these blocks will -// be executed on every path through the method. -static constexpr size_t kDominatesExitBlockWeightMultiplier = 2; - -enum class CoalesceKind { - kAdjacentSibling, // Prevents moves at interval split points. - kFixedOutputSibling, // Prevents moves from a fixed output location. - kFixedInput, // Prevents moves into a fixed input location. - kNonlinearControlFlow, // Prevents moves between blocks. - kPhi, // Prevents phi resolution moves. - kFirstInput, // Prevents a single input move. - kAnyInput, // May lead to better instruction selection / smaller encodings. -}; - -std::ostream& operator<<(std::ostream& os, const CoalesceKind& kind) { - return os << static_cast<typename std::underlying_type<CoalesceKind>::type>(kind); -} - -static size_t LoopDepthAt(HBasicBlock* block) { - HLoopInformation* loop_info = block->GetLoopInformation(); - size_t depth = 0; - while (loop_info != nullptr) { - ++depth; - loop_info = loop_info->GetPreHeader()->GetLoopInformation(); - } - return depth; -} - -// Return the runtime cost of inserting a move instruction at the specified location. -static size_t CostForMoveAt(size_t position, const SsaLivenessAnalysis& liveness) { - HBasicBlock* block = liveness.GetBlockFromPosition(position / 2); - DCHECK(block != nullptr); - size_t cost = 1; - if (block->IsSingleJump()) { - cost *= kSingleJumpBlockWeightMultiplier; - } - if (block->Dominates(block->GetGraph()->GetExitBlock())) { - cost *= kDominatesExitBlockWeightMultiplier; - } - for (size_t loop_depth = LoopDepthAt(block); loop_depth > 0; --loop_depth) { - cost *= kLoopSpillWeightMultiplier; - } - return cost; -} - -// In general, we estimate coalesce priority by whether it will definitely avoid a move, -// and by how likely it is to create an interference graph that's harder to color. -static size_t ComputeCoalescePriority(CoalesceKind kind, - size_t position, - const SsaLivenessAnalysis& liveness) { - if (kind == CoalesceKind::kAnyInput) { - // This type of coalescing can affect instruction selection, but not moves, so we - // give it the lowest priority. - return 0; - } else { - return CostForMoveAt(position, liveness); - } -} - -enum class CoalesceStage { - kWorklist, // Currently in the iterative coalescing worklist. - kActive, // Not in a worklist, but could be considered again during iterative coalescing. - kInactive, // No longer considered until last-chance coalescing. - kDefunct, // Either the two nodes interfere, or have already been coalesced. -}; - -std::ostream& operator<<(std::ostream& os, const CoalesceStage& stage) { - return os << static_cast<typename std::underlying_type<CoalesceStage>::type>(stage); -} - -// Represents a coalesce opportunity between two nodes. -struct CoalesceOpportunity : public ArenaObject<kArenaAllocRegisterAllocator> { - CoalesceOpportunity(InterferenceNode* a, - InterferenceNode* b, - CoalesceKind kind, - size_t position, - const SsaLivenessAnalysis& liveness) - : node_a(a), - node_b(b), - stage(CoalesceStage::kWorklist), - priority(ComputeCoalescePriority(kind, position, liveness)) {} - - // Compare two coalesce opportunities based on their priority. - // Return true if lhs has a lower priority than that of rhs. - static bool CmpPriority(const CoalesceOpportunity* lhs, - const CoalesceOpportunity* rhs) { - return lhs->priority < rhs->priority; - } - - InterferenceNode* const node_a; - InterferenceNode* const node_b; - - // The current stage of this coalesce opportunity, indicating whether it is in a worklist, - // and whether it should still be considered. - CoalesceStage stage; - - // The priority of this coalesce opportunity, based on heuristics. - const size_t priority; -}; - -enum class NodeStage { - kInitial, // Uninitialized. - kPrecolored, // Marks fixed nodes. - kSafepoint, // Marks safepoint nodes. - kPrunable, // Marks uncolored nodes in the interference graph. - kSimplifyWorklist, // Marks non-move-related nodes with degree less than the number of registers. - kFreezeWorklist, // Marks move-related nodes with degree less than the number of registers. - kSpillWorklist, // Marks nodes with degree greater or equal to the number of registers. - kPruned // Marks nodes already pruned from the interference graph. -}; - -std::ostream& operator<<(std::ostream& os, const NodeStage& stage) { - return os << static_cast<typename std::underlying_type<NodeStage>::type>(stage); -} - -// Returns the estimated cost of spilling a particular live interval. -static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysis& liveness) { - if (interval->HasRegister()) { - // Intervals with a fixed register cannot be spilled. - return std::numeric_limits<float>::min(); - } - - size_t length = interval->GetLength(); - if (length == 1) { - // Tiny intervals should have maximum priority, since they cannot be split any further. - return std::numeric_limits<float>::max(); - } - - size_t use_weight = 0; - if (interval->GetDefinedBy() != nullptr && interval->DefinitionRequiresRegister()) { - // Cost for spilling at a register definition point. - use_weight += CostForMoveAt(interval->GetStart() + 1, liveness); - } - - // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e. - // [interval->GetStart() + 1, interval->GetEnd() + 1) - auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(), - interval->GetUses().end(), - interval->GetStart() + 1u, - interval->GetEnd() + 1u); - for (const UsePosition& use : matching_use_range) { - if (use.GetUser() != nullptr && use.RequiresRegister()) { - // Cost for spilling at a register use point. - use_weight += CostForMoveAt(use.GetUser()->GetLifetimePosition() - 1, liveness); - } - } - - // We divide by the length of the interval because we want to prioritize - // short intervals; we do not benefit much if we split them further. - return static_cast<float>(use_weight) / static_cast<float>(length); -} - -// Interference nodes make up the interference graph, which is the primary data structure in -// graph coloring register allocation. Each node represents a single live interval, and contains -// a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory, -// pre-colored nodes never contain outgoing edges (only incoming ones). -// -// As nodes are pruned from the interference graph, incoming edges of the pruned node are removed, -// but outgoing edges remain in order to later color the node based on the colors of its neighbors. -// -// Note that a pair interval is represented by a single node in the interference graph, which -// essentially requires two colors. One consequence of this is that the degree of a node is not -// necessarily equal to the number of adjacent nodes--instead, the degree reflects the maximum -// number of colors with which a node could interfere. We model this by giving edges different -// weights (1 or 2) to control how much it increases the degree of adjacent nodes. -// For example, the edge between two single nodes will have weight 1. On the other hand, -// the edge between a single node and a pair node will have weight 2. This is because the pair -// node could block up to two colors for the single node, and because the single node could -// block an entire two-register aligned slot for the pair node. -// The degree is defined this way because we use it to decide whether a node is guaranteed a color, -// and thus whether it is safe to prune it from the interference graph early on. -class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> { - public: - InterferenceNode(LiveInterval* interval, - const SsaLivenessAnalysis& liveness) - : stage(NodeStage::kInitial), - interval_(interval), - adjacent_nodes_(nullptr), - coalesce_opportunities_(nullptr), - out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0), - alias_(this), - spill_weight_(ComputeSpillWeight(interval, liveness)), - requires_color_(interval->RequiresRegister()), - needs_spill_slot_(false) { - DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval"; - } - - void AddInterference(InterferenceNode* other, - bool guaranteed_not_interfering_yet, - ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>>* storage) { - DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences"; - DCHECK_NE(this, other) << "Should not create self loops in the interference graph"; - DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another"; - DCHECK_NE(stage, NodeStage::kPruned); - DCHECK_NE(other->stage, NodeStage::kPruned); - if (adjacent_nodes_ == nullptr) { - ScopedArenaVector<InterferenceNode*>::allocator_type adapter(storage->get_allocator()); - storage->emplace_back(adapter); - adjacent_nodes_ = &storage->back(); - } - if (guaranteed_not_interfering_yet) { - DCHECK(!ContainsElement(GetAdjacentNodes(), other)); - adjacent_nodes_->push_back(other); - out_degree_ += EdgeWeightWith(other); - } else { - if (!ContainsElement(GetAdjacentNodes(), other)) { - adjacent_nodes_->push_back(other); - out_degree_ += EdgeWeightWith(other); - } - } - } - - void RemoveInterference(InterferenceNode* other) { - DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node"; - DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning"; - if (adjacent_nodes_ != nullptr) { - auto it = std::find(adjacent_nodes_->begin(), adjacent_nodes_->end(), other); - if (it != adjacent_nodes_->end()) { - adjacent_nodes_->erase(it); - out_degree_ -= EdgeWeightWith(other); - } - } - } - - bool ContainsInterference(InterferenceNode* other) const { - DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences"; - DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences"; - return ContainsElement(GetAdjacentNodes(), other); - } - - LiveInterval* GetInterval() const { - return interval_; - } - - ArrayRef<InterferenceNode*> GetAdjacentNodes() const { - return adjacent_nodes_ != nullptr - ? ArrayRef<InterferenceNode*>(*adjacent_nodes_) - : ArrayRef<InterferenceNode*>(); - } - - size_t GetOutDegree() const { - // Pre-colored nodes have infinite degree. - DCHECK_IMPLIES(IsPrecolored(), out_degree_ == std::numeric_limits<size_t>::max()); - return out_degree_; - } - - void AddCoalesceOpportunity(CoalesceOpportunity* opportunity, - ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>>* storage) { - if (coalesce_opportunities_ == nullptr) { - ScopedArenaVector<CoalesceOpportunity*>::allocator_type adapter(storage->get_allocator()); - storage->emplace_back(adapter); - coalesce_opportunities_ = &storage->back(); - } - coalesce_opportunities_->push_back(opportunity); - } - - void ClearCoalesceOpportunities() { - coalesce_opportunities_ = nullptr; - } - - bool IsMoveRelated() const { - for (CoalesceOpportunity* opportunity : GetCoalesceOpportunities()) { - if (opportunity->stage == CoalesceStage::kWorklist || - opportunity->stage == CoalesceStage::kActive) { - return true; - } - } - return false; - } - - // Return whether this node already has a color. - // Used to find fixed nodes in the interference graph before coloring. - bool IsPrecolored() const { - return interval_->HasRegister(); - } - - bool IsPair() const { - return interval_->HasHighInterval(); - } - - void SetAlias(InterferenceNode* rep) { - DCHECK_NE(rep->stage, NodeStage::kPruned); - DCHECK_EQ(this, alias_) << "Should only set a node's alias once"; - alias_ = rep; - } - - InterferenceNode* GetAlias() { - if (alias_ != this) { - // Recurse in order to flatten tree of alias pointers. - alias_ = alias_->GetAlias(); - } - return alias_; - } - - ArrayRef<CoalesceOpportunity*> GetCoalesceOpportunities() const { - return coalesce_opportunities_ != nullptr - ? ArrayRef<CoalesceOpportunity*>(*coalesce_opportunities_) - : ArrayRef<CoalesceOpportunity*>(); - } - - float GetSpillWeight() const { - return spill_weight_; - } - - bool RequiresColor() const { - return requires_color_; - } - - // We give extra weight to edges adjacent to pair nodes. See the general comment on the - // interference graph above. - size_t EdgeWeightWith(const InterferenceNode* other) const { - return (IsPair() || other->IsPair()) ? 2 : 1; - } - - bool NeedsSpillSlot() const { - return needs_spill_slot_; - } - - void SetNeedsSpillSlot() { - needs_spill_slot_ = true; - } - - // The current stage of this node, indicating which worklist it belongs to. - NodeStage stage; - - private: - // The live interval that this node represents. - LiveInterval* const interval_; - - // All nodes interfering with this one. - // We use an unsorted vector as a set, since a tree or hash set is too heavy for the - // set sizes that we encounter. Using a vector leads to much better performance. - ScopedArenaVector<InterferenceNode*>* adjacent_nodes_; // Owned by ColoringIteration. - - // Interference nodes that this node should be coalesced with to reduce moves. - ScopedArenaVector<CoalesceOpportunity*>* coalesce_opportunities_; // Owned by ColoringIteration. - - // The maximum number of colors with which this node could interfere. This could be more than - // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes. - // We use "out" degree because incoming edges come from nodes already pruned from the graph, - // and do not affect the coloring of this node. - // Pre-colored nodes are treated as having infinite degree. - size_t out_degree_; - - // The node representing this node in the interference graph. - // Initially set to `this`, and only changed if this node is coalesced into another. - InterferenceNode* alias_; - - // The cost of splitting and spilling this interval to the stack. - // Nodes with a higher spill weight should be prioritized when assigning registers. - // This is essentially based on use density and location; short intervals with many uses inside - // deeply nested loops have a high spill weight. - const float spill_weight_; - - const bool requires_color_; - - bool needs_spill_slot_; - - DISALLOW_COPY_AND_ASSIGN(InterferenceNode); -}; - -// The order in which we color nodes is important. To guarantee forward progress, -// we prioritize intervals that require registers, and after that we prioritize -// short intervals. That way, if we fail to color a node, it either won't require a -// register, or it will be a long interval that can be split in order to make the -// interference graph sparser. -// To improve code quality, we prioritize intervals used frequently in deeply nested loops. -// (This metric is secondary to the forward progress requirements above.) -// TODO: May also want to consider: -// - Constants (since they can be rematerialized) -// - Allocated spill slots -static bool HasGreaterNodePriority(const InterferenceNode* lhs, - const InterferenceNode* rhs) { - // (1) Prioritize the node that requires a color. - if (lhs->RequiresColor() != rhs->RequiresColor()) { - return lhs->RequiresColor(); - } - - // (2) Prioritize the interval that has a higher spill weight. - return lhs->GetSpillWeight() > rhs->GetSpillWeight(); -} - -// A ColoringIteration holds the many data structures needed for a single graph coloring attempt, -// and provides methods for each phase of the attempt. -class ColoringIteration { - public: - ColoringIteration(RegisterAllocatorGraphColor* register_allocator, - ScopedArenaAllocator* allocator, - bool processing_core_regs, - size_t num_regs) - : register_allocator_(register_allocator), - allocator_(allocator), - processing_core_regs_(processing_core_regs), - num_regs_(num_regs), - interval_node_map_(allocator->Adapter(kArenaAllocRegisterAllocator)), - prunable_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), - pruned_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), - simplify_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)), - freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)), - spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)), - coalesce_worklist_(CoalesceOpportunity::CmpPriority, - allocator->Adapter(kArenaAllocRegisterAllocator)), - adjacent_nodes_links_(allocator->Adapter(kArenaAllocRegisterAllocator)), - coalesce_opportunities_links_(allocator->Adapter(kArenaAllocRegisterAllocator)) {} - - // Use the intervals collected from instructions to construct an - // interference graph mapping intervals to adjacency lists. - // Also, collect synthesized safepoint nodes, used to keep - // track of live intervals across safepoints. - // TODO: Should build safepoints elsewhere. - void BuildInterferenceGraph(const ScopedArenaVector<LiveInterval*>& intervals, - const ScopedArenaVector<InterferenceNode*>& physical_nodes); - - // Add coalesce opportunities to interference nodes. - void FindCoalesceOpportunities(); - - // Prune nodes from the interference graph to be colored later. Build - // a stack (pruned_nodes) containing these intervals in an order determined - // by various heuristics. - void PruneInterferenceGraph(); - - // Process pruned_intervals_ to color the interference graph, spilling when - // necessary. Returns true if successful. Else, some intervals have been - // split, and the interference graph should be rebuilt for another attempt. - bool ColorInterferenceGraph(); - - // Return prunable nodes. - // The register allocator will need to access prunable nodes after coloring - // in order to tell the code generator which registers have been assigned. - ArrayRef<InterferenceNode* const> GetPrunableNodes() const { - return ArrayRef<InterferenceNode* const>(prunable_nodes_); - } - - private: - // Create a coalesce opportunity between two nodes. - void CreateCoalesceOpportunity(InterferenceNode* a, - InterferenceNode* b, - CoalesceKind kind, - size_t position); - - // Add an edge in the interference graph, if valid. - // Note that `guaranteed_not_interfering_yet` is used to optimize adjacency set insertion - // when possible. - void AddPotentialInterference(InterferenceNode* from, - InterferenceNode* to, - bool guaranteed_not_interfering_yet, - bool both_directions = true); - - // Invalidate all coalesce opportunities this node has, so that it (and possibly its neighbors) - // may be pruned from the interference graph. - void FreezeMoves(InterferenceNode* node); - - // Prune a node from the interference graph, updating worklists if necessary. - void PruneNode(InterferenceNode* node); - - // Add coalesce opportunities associated with this node to the coalesce worklist. - void EnableCoalesceOpportunities(InterferenceNode* node); - - // If needed, from `node` from the freeze worklist to the simplify worklist. - void CheckTransitionFromFreezeWorklist(InterferenceNode* node); - - // Return true if `into` is colored, and `from` can be coalesced with `into` conservatively. - bool PrecoloredHeuristic(InterferenceNode* from, InterferenceNode* into); - - // Return true if `from` and `into` are uncolored, and can be coalesced conservatively. - bool UncoloredHeuristic(InterferenceNode* from, InterferenceNode* into); - - void Coalesce(CoalesceOpportunity* opportunity); - - // Merge `from` into `into` in the interference graph. - void Combine(InterferenceNode* from, InterferenceNode* into); - - // A reference to the register allocator instance, - // needed to split intervals and assign spill slots. - RegisterAllocatorGraphColor* register_allocator_; - - // A scoped arena allocator used for a single graph coloring attempt. - ScopedArenaAllocator* allocator_; - - const bool processing_core_regs_; - - const size_t num_regs_; - - // A map from live intervals to interference nodes. - ScopedArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_; - - // Uncolored nodes that should be pruned from the interference graph. - ScopedArenaVector<InterferenceNode*> prunable_nodes_; - - // A stack of nodes pruned from the interference graph, waiting to be pruned. - ScopedArenaStdStack<InterferenceNode*> pruned_nodes_; - - // A queue containing low degree, non-move-related nodes that can pruned immediately. - ScopedArenaDeque<InterferenceNode*> simplify_worklist_; - - // A queue containing low degree, move-related nodes. - ScopedArenaDeque<InterferenceNode*> freeze_worklist_; - - // A queue containing high degree nodes. - // If we have to prune from the spill worklist, we cannot guarantee - // the pruned node a color, so we order the worklist by priority. - ScopedArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_; - - // A queue containing coalesce opportunities. - // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those - // inside of loops) are more important than others. - ScopedArenaPriorityQueue<CoalesceOpportunity*, - decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_; - - // Storage for links to adjacent nodes for interference nodes. - // Using std::deque so that elements do not move when adding new ones. - ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>> adjacent_nodes_links_; - - // Storage for links to coalesce opportunities for interference nodes. - // Using std::deque so that elements do not move when adding new ones. - ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>> coalesce_opportunities_links_; - - DISALLOW_COPY_AND_ASSIGN(ColoringIteration); -}; - -static bool IsCoreInterval(LiveInterval* interval) { - return !DataType::IsFloatingPointType(interval->GetType()); -} - -static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) { - return static_cast<size_t>(InstructionSetPointerSize(codegen.GetInstructionSet())) / kVRegSize; -} - -RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ScopedArenaAllocator* allocator, - CodeGenerator* codegen, - const SsaLivenessAnalysis& liveness, - bool iterative_move_coalescing) - : RegisterAllocator(allocator, codegen, liveness), - iterative_move_coalescing_(iterative_move_coalescing), - core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), - fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), - temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), - safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)), - physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), - physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), - num_int_spill_slots_(0), - num_double_spill_slots_(0), - num_float_spill_slots_(0), - num_long_spill_slots_(0), - catch_phi_spill_slot_counter_(0), - reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)), - reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()) { - // Before we ask for blocked registers, set them up in the code generator. - codegen->SetupBlockedRegisters(); - - // Initialize physical core register live intervals and blocked registers. - // This includes globally blocked registers, such as the stack pointer. - physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr); - for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { - LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kInt32); - physical_core_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness); - physical_core_nodes_[i]->stage = NodeStage::kPrecolored; - core_intervals_.push_back(interval); - if (codegen_->IsBlockedCoreRegister(i)) { - interval->AddRange(0, liveness.GetMaxLifetimePosition()); - } - } - // Initialize physical floating point register live intervals and blocked registers. - physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr); - for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) { - LiveInterval* interval = - LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kFloat32); - physical_fp_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness); - physical_fp_nodes_[i]->stage = NodeStage::kPrecolored; - fp_intervals_.push_back(interval); - if (codegen_->IsBlockedFloatingPointRegister(i)) { - interval->AddRange(0, liveness.GetMaxLifetimePosition()); - } - } -} - -RegisterAllocatorGraphColor::~RegisterAllocatorGraphColor() {} - -void RegisterAllocatorGraphColor::AllocateRegisters() { - // (1) Collect and prepare live intervals. - ProcessInstructions(); - - for (bool processing_core_regs : {true, false}) { - ScopedArenaVector<LiveInterval*>& intervals = processing_core_regs - ? core_intervals_ - : fp_intervals_; - size_t num_registers = processing_core_regs - ? codegen_->GetNumberOfCoreRegisters() - : codegen_->GetNumberOfFloatingPointRegisters(); - - size_t attempt = 0; - while (true) { - ++attempt; - DCHECK(attempt <= kMaxGraphColoringAttemptsDebug) - << "Exceeded debug max graph coloring register allocation attempts. " - << "This could indicate that the register allocator is not making forward progress, " - << "which could be caused by prioritizing the wrong live intervals. (Short intervals " - << "should be prioritized over long ones, because they cannot be split further.)"; - - // Many data structures are cleared between graph coloring attempts, so we reduce - // total memory usage by using a new scoped arena allocator for each attempt. - ScopedArenaAllocator coloring_attempt_allocator(allocator_->GetArenaStack()); - ColoringIteration iteration(this, - &coloring_attempt_allocator, - processing_core_regs, - num_registers); - - // (2) Build the interference graph. - ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs - ? physical_core_nodes_ - : physical_fp_nodes_; - iteration.BuildInterferenceGraph(intervals, physical_nodes); - - // (3) Add coalesce opportunities. - // If we have tried coloring the graph a suspiciously high number of times, give - // up on move coalescing, just in case the coalescing heuristics are not conservative. - // (This situation will be caught if DCHECKs are turned on.) - if (iterative_move_coalescing_ && attempt <= kMaxGraphColoringAttemptsDebug) { - iteration.FindCoalesceOpportunities(); - } - - // (4) Prune all uncolored nodes from interference graph. - iteration.PruneInterferenceGraph(); - - // (5) Color pruned nodes based on interferences. - bool successful = iteration.ColorInterferenceGraph(); - - // We manually clear coalesce opportunities for physical nodes, - // since they persist across coloring attempts. - for (InterferenceNode* node : physical_core_nodes_) { - node->ClearCoalesceOpportunities(); - } - for (InterferenceNode* node : physical_fp_nodes_) { - node->ClearCoalesceOpportunities(); - } - - if (successful) { - // Assign spill slots. - AllocateSpillSlots(iteration.GetPrunableNodes()); - - // Tell the code generator which registers were allocated. - // We only look at prunable_nodes because we already told the code generator about - // fixed intervals while processing instructions. We also ignore the fixed intervals - // placed at the top of catch blocks. - for (InterferenceNode* node : iteration.GetPrunableNodes()) { - LiveInterval* interval = node->GetInterval(); - if (interval->HasRegister()) { - Location low_reg = processing_core_regs - ? Location::RegisterLocation(interval->GetRegister()) - : Location::FpuRegisterLocation(interval->GetRegister()); - codegen_->AddAllocatedRegister(low_reg); - if (interval->HasHighInterval()) { - LiveInterval* high = interval->GetHighInterval(); - DCHECK(high->HasRegister()); - Location high_reg = processing_core_regs - ? Location::RegisterLocation(high->GetRegister()) - : Location::FpuRegisterLocation(high->GetRegister()); - codegen_->AddAllocatedRegister(high_reg); - } - } else { - DCHECK_IMPLIES(interval->HasHighInterval(), - !interval->GetHighInterval()->HasRegister()); - } - } - - break; - } - } // while unsuccessful - } // for processing_core_instructions - - // (6) Resolve locations and deconstruct SSA form. - RegisterAllocationResolver(codegen_, liveness_) - .Resolve(ArrayRef<HInstruction* const>(safepoints_), - reserved_art_method_slots_ + reserved_out_slots_, - num_int_spill_slots_, - num_long_spill_slots_, - num_float_spill_slots_, - num_double_spill_slots_, - catch_phi_spill_slot_counter_, - ArrayRef<LiveInterval* const>(temp_intervals_)); - - if (kIsDebugBuild) { - Validate(/*log_fatal_on_failure*/ true); - } -} - -bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) { - for (bool processing_core_regs : {true, false}) { - ScopedArenaAllocator allocator(allocator_->GetArenaStack()); - ScopedArenaVector<LiveInterval*> intervals( - allocator.Adapter(kArenaAllocRegisterAllocatorValidate)); - for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) { - HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); - LiveInterval* interval = instruction->GetLiveInterval(); - if (interval != nullptr && IsCoreInterval(interval) == processing_core_regs) { - intervals.push_back(instruction->GetLiveInterval()); - } - } - - ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs - ? physical_core_nodes_ - : physical_fp_nodes_; - for (InterferenceNode* fixed : physical_nodes) { - LiveInterval* interval = fixed->GetInterval(); - if (interval->GetFirstRange() != nullptr) { - // Ideally we would check fixed ranges as well, but currently there are times when - // two fixed intervals for the same register will overlap. For example, a fixed input - // and a fixed output may sometimes share the same register, in which there will be two - // fixed intervals for the same place. - } - } - - for (LiveInterval* temp : temp_intervals_) { - if (IsCoreInterval(temp) == processing_core_regs) { - intervals.push_back(temp); - } - } - - size_t spill_slots = num_int_spill_slots_ - + num_long_spill_slots_ - + num_float_spill_slots_ - + num_double_spill_slots_ - + catch_phi_spill_slot_counter_; - bool ok = ValidateIntervals(ArrayRef<LiveInterval* const>(intervals), - spill_slots, - reserved_art_method_slots_ + reserved_out_slots_, - *codegen_, - processing_core_regs, - log_fatal_on_failure); - if (!ok) { - return false; - } - } // for processing_core_regs - - return true; -} - -void RegisterAllocatorGraphColor::ProcessInstructions() { - for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) { - // Note that we currently depend on this ordering, since some helper - // code is designed for linear scan register allocation. - for (HBackwardInstructionIterator instr_it(block->GetInstructions()); - !instr_it.Done(); - instr_it.Advance()) { - ProcessInstruction(instr_it.Current()); - } - - for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { - ProcessInstruction(phi_it.Current()); - } - - if (block->IsCatchBlock() - || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) { - // By blocking all registers at the top of each catch block or irreducible loop, we force - // intervals belonging to the live-in set of the catch/header block to be spilled. - // TODO(ngeoffray): Phis in this block could be allocated in register. - size_t position = block->GetLifetimeStart(); - BlockRegisters(position, position + 1); - } - } -} - -bool RegisterAllocatorGraphColor::TryRemoveSuspendCheckEntry(HInstruction* instruction) { - LocationSummary* locations = instruction->GetLocations(); - if (instruction->IsSuspendCheckEntry() && !codegen_->NeedsSuspendCheckEntry()) { - // TODO: We do this here because we do not want the suspend check to artificially - // create live registers. We should find another place, but this is currently the - // simplest. - DCHECK_EQ(locations->GetTempCount(), 0u); - instruction->GetBlock()->RemoveInstruction(instruction); - return true; - } - return false; -} - -void RegisterAllocatorGraphColor::ProcessInstruction(HInstruction* instruction) { - LocationSummary* locations = instruction->GetLocations(); - if (locations == nullptr) { - return; - } - if (TryRemoveSuspendCheckEntry(instruction)) { - return; - } - - CheckForTempLiveIntervals(instruction); - CheckForSafepoint(instruction); - if (locations->WillCall()) { - // If a call will happen, create fixed intervals for caller-save registers. - // TODO: Note that it may be beneficial to later split intervals at this point, - // so that we allow last-minute moves from a caller-save register - // to a callee-save register. - BlockRegisters(instruction->GetLifetimePosition(), - instruction->GetLifetimePosition() + 1, - /*caller_save_only*/ true); - } - CheckForFixedInputs(instruction); - - LiveInterval* interval = instruction->GetLiveInterval(); - if (interval == nullptr) { - // Instructions lacking a valid output location do not have a live interval. - DCHECK(!locations->Out().IsValid()); - return; - } - - // Low intervals act as representatives for their corresponding high interval. - DCHECK(!interval->IsHighInterval()); - if (codegen_->NeedsTwoRegisters(interval->GetType())) { - interval->AddHighInterval(); - } - AddSafepointsFor(instruction); - CheckForFixedOutput(instruction); - AllocateSpillSlotForCatchPhi(instruction); - - ScopedArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval) - ? core_intervals_ - : fp_intervals_; - if (interval->HasSpillSlot() || instruction->IsConstant()) { - // Note that if an interval already has a spill slot, then its value currently resides - // in the stack (e.g., parameters). Thus we do not have to allocate a register until its first - // register use. This is also true for constants, which can be materialized at any point. - size_t first_register_use = interval->FirstRegisterUse(); - if (first_register_use != kNoLifetime) { - LiveInterval* split = SplitBetween(interval, interval->GetStart(), first_register_use - 1); - intervals.push_back(split); - } else { - // We won't allocate a register for this value. - } - } else { - intervals.push_back(interval); - } -} - -void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) { - // We simply block physical registers where necessary. - // TODO: Ideally we would coalesce the physical register with the register - // allocated to the input value, but this can be tricky if, e.g., there - // could be multiple physical register uses of the same value at the - // same instruction. Furthermore, there's currently no distinction between - // fixed inputs to a call (which will be clobbered) and other fixed inputs (which - // may not be clobbered). - LocationSummary* locations = instruction->GetLocations(); - size_t position = instruction->GetLifetimePosition(); - for (size_t i = 0; i < locations->GetInputCount(); ++i) { - Location input = locations->InAt(i); - if (input.IsRegister() || input.IsFpuRegister()) { - BlockRegister(input, position, position + 1); - codegen_->AddAllocatedRegister(input); - } else if (input.IsPair()) { - BlockRegister(input.ToLow(), position, position + 1); - BlockRegister(input.ToHigh(), position, position + 1); - codegen_->AddAllocatedRegister(input.ToLow()); - codegen_->AddAllocatedRegister(input.ToHigh()); - } - } -} - -void RegisterAllocatorGraphColor::CheckForFixedOutput(HInstruction* instruction) { - // If an instruction has a fixed output location, we give the live interval a register and then - // proactively split it just after the definition point to avoid creating too many interferences - // with a fixed node. - LiveInterval* interval = instruction->GetLiveInterval(); - Location out = interval->GetDefinedBy()->GetLocations()->Out(); - size_t position = instruction->GetLifetimePosition(); - DCHECK_GE(interval->GetEnd() - position, 2u); - - if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) { - out = instruction->GetLocations()->InAt(0); - } - - if (out.IsRegister() || out.IsFpuRegister()) { - interval->SetRegister(out.reg()); - codegen_->AddAllocatedRegister(out); - Split(interval, position + 1); - } else if (out.IsPair()) { - interval->SetRegister(out.low()); - interval->GetHighInterval()->SetRegister(out.high()); - codegen_->AddAllocatedRegister(out.ToLow()); - codegen_->AddAllocatedRegister(out.ToHigh()); - Split(interval, position + 1); - } else if (out.IsStackSlot() || out.IsDoubleStackSlot()) { - interval->SetSpillSlot(out.GetStackIndex()); - } else { - DCHECK(out.IsUnallocated() || out.IsConstant()); - } -} - -void RegisterAllocatorGraphColor::AddSafepointsFor(HInstruction* instruction) { - LiveInterval* interval = instruction->GetLiveInterval(); - for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) { - HInstruction* safepoint = safepoints_[safepoint_index - 1u]; - size_t safepoint_position = safepoint->GetLifetimePosition(); - - // Test that safepoints_ are ordered in the optimal way. - DCHECK(safepoint_index == safepoints_.size() || - safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position); - - if (safepoint_position == interval->GetStart()) { - // The safepoint is for this instruction, so the location of the instruction - // does not need to be saved. - DCHECK_EQ(safepoint_index, safepoints_.size()); - DCHECK_EQ(safepoint, instruction); - continue; - } else if (interval->IsDeadAt(safepoint_position)) { - break; - } else if (!interval->Covers(safepoint_position)) { - // Hole in the interval. - continue; - } - interval->AddSafepoint(safepoint); - } -} - -void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instruction) { - LocationSummary* locations = instruction->GetLocations(); - size_t position = instruction->GetLifetimePosition(); - for (size_t i = 0; i < locations->GetTempCount(); ++i) { - Location temp = locations->GetTemp(i); - if (temp.IsRegister() || temp.IsFpuRegister()) { - BlockRegister(temp, position, position + 1); - codegen_->AddAllocatedRegister(temp); - } else { - DCHECK(temp.IsUnallocated()); - switch (temp.GetPolicy()) { - case Location::kRequiresRegister: { - LiveInterval* interval = - LiveInterval::MakeTempInterval(allocator_, DataType::Type::kInt32); - interval->AddTempUse(instruction, i); - core_intervals_.push_back(interval); - temp_intervals_.push_back(interval); - break; - } - - case Location::kRequiresFpuRegister: { - LiveInterval* interval = - LiveInterval::MakeTempInterval(allocator_, DataType::Type::kFloat64); - interval->AddTempUse(instruction, i); - fp_intervals_.push_back(interval); - temp_intervals_.push_back(interval); - if (codegen_->NeedsTwoRegisters(DataType::Type::kFloat64)) { - interval->AddHighInterval(/*is_temp*/ true); - temp_intervals_.push_back(interval->GetHighInterval()); - } - break; - } - - default: - LOG(FATAL) << "Unexpected policy for temporary location " - << temp.GetPolicy(); - } - } - } -} - -void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) { - LocationSummary* locations = instruction->GetLocations(); - - if (locations->NeedsSafepoint()) { - safepoints_.push_back(instruction); - } -} - -LiveInterval* RegisterAllocatorGraphColor::TrySplit(LiveInterval* interval, size_t position) { - if (interval->GetStart() < position && position < interval->GetEnd()) { - return Split(interval, position); - } else { - return interval; - } -} - -void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) { - DCHECK(!interval->IsHighInterval()); - - // Split just after a register definition. - if (interval->IsParent() && interval->DefinitionRequiresRegister()) { - interval = TrySplit(interval, interval->GetStart() + 1); - } - - // Process uses in the range [interval->GetStart(), interval->GetEnd()], i.e. - // [interval->GetStart(), interval->GetEnd() + 1) - auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(), - interval->GetUses().end(), - interval->GetStart(), - interval->GetEnd() + 1u); - // Split around register uses. - for (const UsePosition& use : matching_use_range) { - if (use.RequiresRegister()) { - size_t position = use.GetPosition(); - interval = TrySplit(interval, position - 1); - if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) { - // If we are at the very end of a basic block, we cannot split right - // at the use. Split just after instead. - interval = TrySplit(interval, position + 1); - } else { - interval = TrySplit(interval, position); - } - } - } -} - -void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* instruction) { - if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) { - HPhi* phi = instruction->AsPhi(); - LiveInterval* interval = phi->GetLiveInterval(); - - HInstruction* previous_phi = phi->GetPrevious(); - DCHECK(previous_phi == nullptr || - previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber()) - << "Phis expected to be sorted by vreg number, " - << "so that equivalent phis are adjacent."; - - if (phi->IsVRegEquivalentOf(previous_phi)) { - // Assign the same spill slot. - DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot()); - interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot()); - } else { - interval->SetSpillSlot(catch_phi_spill_slot_counter_); - catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded(); - } - } -} - -void RegisterAllocatorGraphColor::BlockRegister(Location location, - size_t start, - size_t end) { - DCHECK(location.IsRegister() || location.IsFpuRegister()); - int reg = location.reg(); - LiveInterval* interval = location.IsRegister() - ? physical_core_nodes_[reg]->GetInterval() - : physical_fp_nodes_[reg]->GetInterval(); - DCHECK(interval->GetRegister() == reg); - bool blocked_by_codegen = location.IsRegister() - ? codegen_->IsBlockedCoreRegister(reg) - : codegen_->IsBlockedFloatingPointRegister(reg); - if (blocked_by_codegen) { - // We've already blocked this register for the entire method. (And adding a - // range inside another range violates the preconditions of AddRange). - } else { - interval->AddRange(start, end); - } -} - -void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool caller_save_only) { - for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { - if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) { - BlockRegister(Location::RegisterLocation(i), start, end); - } - } - for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) { - if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) { - BlockRegister(Location::FpuRegisterLocation(i), start, end); - } - } -} - -void ColoringIteration::AddPotentialInterference(InterferenceNode* from, - InterferenceNode* to, - bool guaranteed_not_interfering_yet, - bool both_directions) { - if (from->IsPrecolored()) { - // We save space by ignoring outgoing edges from fixed nodes. - } else if (to->IsPrecolored()) { - // It is important that only a single node represents a given fixed register in the - // interference graph. We retrieve that node here. - const ScopedArenaVector<InterferenceNode*>& physical_nodes = - to->GetInterval()->IsFloatingPoint() ? register_allocator_->physical_fp_nodes_ - : register_allocator_->physical_core_nodes_; - InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()]; - from->AddInterference( - physical_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_); - DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister()); - DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node"; - - // If a node interferes with a fixed pair node, the weight of the edge may - // be inaccurate after using the alias of the pair node, because the alias of the pair node - // is a singular node. - // We could make special pair fixed nodes, but that ends up being too conservative because - // a node could then interfere with both {r1} and {r1,r2}, leading to a degree of - // three rather than two. - // Instead, we explicitly add an interference with the high node of the fixed pair node. - // TODO: This is too conservative at time for pair nodes, but the fact that fixed pair intervals - // can be unaligned on x86 complicates things. - if (to->IsPair()) { - InterferenceNode* high_node = - physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()]; - DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(), - high_node->GetInterval()->GetRegister()); - from->AddInterference( - high_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_); - } - } else { - // Standard interference between two uncolored nodes. - from->AddInterference(to, guaranteed_not_interfering_yet, &adjacent_nodes_links_); - } - - if (both_directions) { - AddPotentialInterference(to, from, guaranteed_not_interfering_yet, /*both_directions*/ false); - } -} - -// Returns true if `in_node` represents an input interval of `out_node`, and the output interval -// is allowed to have the same register as the input interval. -// TODO: Ideally we should just produce correct intervals in liveness analysis. -// We would need to refactor the current live interval layout to do so, which is -// no small task. -static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNode* out_node) { - LiveInterval* output_interval = out_node->GetInterval(); - HInstruction* defined_by = output_interval->GetDefinedBy(); - if (defined_by == nullptr) { - // This must not be a definition point. - return false; - } - - LocationSummary* locations = defined_by->GetLocations(); - if (locations->OutputCanOverlapWithInputs()) { - // This instruction does not allow the output to reuse a register from an input. - return false; - } - - LiveInterval* input_interval = in_node->GetInterval(); - LiveInterval* next_sibling = input_interval->GetNextSibling(); - size_t def_position = defined_by->GetLifetimePosition(); - size_t use_position = def_position + 1; - if (next_sibling != nullptr && next_sibling->GetStart() == use_position) { - // The next sibling starts at the use position, so reusing the input register in the output - // would clobber the input before it's moved into the sibling interval location. - return false; - } - - if (!input_interval->IsDeadAt(use_position) && input_interval->CoversSlow(use_position)) { - // The input interval is live after the use position. - return false; - } - - HInputsRef inputs = defined_by->GetInputs(); - for (size_t i = 0; i < inputs.size(); ++i) { - if (inputs[i]->GetLiveInterval()->GetSiblingAt(def_position) == input_interval) { - DCHECK(input_interval->SameRegisterKind(*output_interval)); - return true; - } - } - - // The input interval was not an input for this instruction. - return false; -} - -void ColoringIteration::BuildInterferenceGraph( - const ScopedArenaVector<LiveInterval*>& intervals, - const ScopedArenaVector<InterferenceNode*>& physical_nodes) { - DCHECK(interval_node_map_.empty() && prunable_nodes_.empty()); - // Build the interference graph efficiently by ordering range endpoints - // by position and doing a linear sweep to find interferences. (That is, we - // jump from endpoint to endpoint, maintaining a set of intervals live at each - // point. If two nodes are ever in the live set at the same time, then they - // interfere with each other.) - // - // We order by both position and (secondarily) by whether the endpoint - // begins or ends a range; we want to process range endings before range - // beginnings at the same position because they should not conflict. - // - // For simplicity, we create a tuple for each endpoint, and then sort the tuples. - // Tuple contents: (position, is_range_beginning, node). - ScopedArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints( - allocator_->Adapter(kArenaAllocRegisterAllocator)); - - // We reserve plenty of space to avoid excessive copying. - range_endpoints.reserve(4 * prunable_nodes_.size()); - - for (LiveInterval* parent : intervals) { - for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) { - LiveRange* range = sibling->GetFirstRange(); - if (range != nullptr) { - InterferenceNode* node = - new (allocator_) InterferenceNode(sibling, register_allocator_->liveness_); - interval_node_map_.insert(std::make_pair(sibling, node)); - - if (sibling->HasRegister()) { - // Fixed nodes should alias the canonical node for the corresponding register. - node->stage = NodeStage::kPrecolored; - InterferenceNode* physical_node = physical_nodes[sibling->GetRegister()]; - node->SetAlias(physical_node); - DCHECK_EQ(node->GetInterval()->GetRegister(), - physical_node->GetInterval()->GetRegister()); - } else { - node->stage = NodeStage::kPrunable; - prunable_nodes_.push_back(node); - } - - while (range != nullptr) { - range_endpoints.push_back(std::make_tuple(range->GetStart(), true, node)); - range_endpoints.push_back(std::make_tuple(range->GetEnd(), false, node)); - range = range->GetNext(); - } - } - } - } - - // Sort the endpoints. - // We explicitly ignore the third entry of each tuple (the node pointer) in order - // to maintain determinism. - std::sort(range_endpoints.begin(), range_endpoints.end(), - [] (const std::tuple<size_t, bool, InterferenceNode*>& lhs, - const std::tuple<size_t, bool, InterferenceNode*>& rhs) { - return std::tie(std::get<0>(lhs), std::get<1>(lhs)) - < std::tie(std::get<0>(rhs), std::get<1>(rhs)); - }); - - // Nodes live at the current position in the linear sweep. - ScopedArenaVector<InterferenceNode*> live(allocator_->Adapter(kArenaAllocRegisterAllocator)); - - // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the - // live set. When we encounter the end of a range, we remove the corresponding node - // from the live set. Nodes interfere if they are in the live set at the same time. - for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) { - bool is_range_beginning; - InterferenceNode* node; - size_t position; - // Extract information from the tuple, including the node this tuple represents. - std::tie(position, is_range_beginning, node) = *it; - - if (is_range_beginning) { - bool guaranteed_not_interfering_yet = position == node->GetInterval()->GetStart(); - for (InterferenceNode* conflicting : live) { - DCHECK_NE(node, conflicting); - if (CheckInputOutputCanOverlap(conflicting, node)) { - // We do not add an interference, because the instruction represented by `node` allows - // its output to share a register with an input, represented here by `conflicting`. - } else { - AddPotentialInterference(node, conflicting, guaranteed_not_interfering_yet); - } - } - DCHECK(std::find(live.begin(), live.end(), node) == live.end()); - live.push_back(node); - } else { - // End of range. - auto live_it = std::find(live.begin(), live.end(), node); - DCHECK(live_it != live.end()); - live.erase(live_it); - } - } - DCHECK(live.empty()); -} - -void ColoringIteration::CreateCoalesceOpportunity(InterferenceNode* a, - InterferenceNode* b, - CoalesceKind kind, - size_t position) { - DCHECK_EQ(a->IsPair(), b->IsPair()) - << "Nodes of different memory widths should never be coalesced"; - CoalesceOpportunity* opportunity = - new (allocator_) CoalesceOpportunity(a, b, kind, position, register_allocator_->liveness_); - a->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_); - b->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_); - coalesce_worklist_.push(opportunity); -} - -// When looking for coalesce opportunities, we use the interval_node_map_ to find the node -// corresponding to an interval. Note that not all intervals are in this map, notably the parents -// of constants and stack arguments. (However, these interval should not be involved in coalesce -// opportunities anyway, because they're not going to be in registers.) -void ColoringIteration::FindCoalesceOpportunities() { - DCHECK(coalesce_worklist_.empty()); - - for (InterferenceNode* node : prunable_nodes_) { - LiveInterval* interval = node->GetInterval(); - - // Coalesce siblings. - LiveInterval* next_sibling = interval->GetNextSibling(); - if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) { - auto it = interval_node_map_.find(next_sibling); - if (it != interval_node_map_.end()) { - InterferenceNode* sibling_node = it->second; - CreateCoalesceOpportunity(node, - sibling_node, - CoalesceKind::kAdjacentSibling, - interval->GetEnd()); - } - } - - // Coalesce fixed outputs with this interval if this interval is an adjacent sibling. - LiveInterval* parent = interval->GetParent(); - if (parent->HasRegister() - && parent->GetNextSibling() == interval - && parent->GetEnd() == interval->GetStart()) { - auto it = interval_node_map_.find(parent); - if (it != interval_node_map_.end()) { - InterferenceNode* parent_node = it->second; - CreateCoalesceOpportunity(node, - parent_node, - CoalesceKind::kFixedOutputSibling, - parent->GetEnd()); - } - } - - // Try to prevent moves across blocks. - // Note that this does not lead to many succeeding coalesce attempts, so could be removed - // if found to add to compile time. - const SsaLivenessAnalysis& liveness = register_allocator_->liveness_; - if (interval->IsSplit() && liveness.IsAtBlockBoundary(interval->GetStart() / 2)) { - // If the start of this interval is at a block boundary, we look at the - // location of the interval in blocks preceding the block this interval - // starts at. This can avoid a move between the two blocks. - HBasicBlock* block = liveness.GetBlockFromPosition(interval->GetStart() / 2); - for (HBasicBlock* predecessor : block->GetPredecessors()) { - size_t position = predecessor->GetLifetimeEnd() - 1; - LiveInterval* existing = interval->GetParent()->GetSiblingAt(position); - if (existing != nullptr) { - auto it = interval_node_map_.find(existing); - if (it != interval_node_map_.end()) { - InterferenceNode* existing_node = it->second; - CreateCoalesceOpportunity(node, - existing_node, - CoalesceKind::kNonlinearControlFlow, - position); - } - } - } - } - - // Coalesce phi inputs with the corresponding output. - HInstruction* defined_by = interval->GetDefinedBy(); - if (defined_by != nullptr && defined_by->IsPhi()) { - ArrayRef<HBasicBlock* const> predecessors(defined_by->GetBlock()->GetPredecessors()); - HInputsRef inputs = defined_by->GetInputs(); - - for (size_t i = 0, e = inputs.size(); i < e; ++i) { - // We want the sibling at the end of the appropriate predecessor block. - size_t position = predecessors[i]->GetLifetimeEnd() - 1; - LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position); - - auto it = interval_node_map_.find(input_interval); - if (it != interval_node_map_.end()) { - InterferenceNode* input_node = it->second; - CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position); - } - } - } - - // Coalesce output with first input when policy is kSameAsFirstInput. - if (defined_by != nullptr) { - Location out = defined_by->GetLocations()->Out(); - if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) { - LiveInterval* input_interval - = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1); - // TODO: Could we consider lifetime holes here? - if (input_interval->GetEnd() == interval->GetStart()) { - auto it = interval_node_map_.find(input_interval); - if (it != interval_node_map_.end()) { - InterferenceNode* input_node = it->second; - CreateCoalesceOpportunity(node, - input_node, - CoalesceKind::kFirstInput, - interval->GetStart()); - } - } - } - } - - // An interval that starts an instruction (that is, it is not split), may - // re-use the registers used by the inputs of that instruction, based on the - // location summary. - if (defined_by != nullptr) { - DCHECK(!interval->IsSplit()); - LocationSummary* locations = defined_by->GetLocations(); - if (!locations->OutputCanOverlapWithInputs()) { - HInputsRef inputs = defined_by->GetInputs(); - for (size_t i = 0; i < inputs.size(); ++i) { - size_t def_point = defined_by->GetLifetimePosition(); - // TODO: Getting the sibling at the def_point might not be quite what we want - // for fixed inputs, since the use will be *at* the def_point rather than after. - LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point); - if (input_interval != nullptr && - input_interval->HasHighInterval() == interval->HasHighInterval()) { - auto it = interval_node_map_.find(input_interval); - if (it != interval_node_map_.end()) { - InterferenceNode* input_node = it->second; - CreateCoalesceOpportunity(node, - input_node, - CoalesceKind::kAnyInput, - interval->GetStart()); - } - } - } - } - } - - // Try to prevent moves into fixed input locations. - // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e. - // [interval->GetStart() + 1, interval->GetEnd() + 1) - auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(), - interval->GetUses().end(), - interval->GetStart() + 1u, - interval->GetEnd() + 1u); - for (const UsePosition& use : matching_use_range) { - HInstruction* user = use.GetUser(); - if (user == nullptr) { - // User may be null for certain intervals, such as temp intervals. - continue; - } - LocationSummary* locations = user->GetLocations(); - Location input = locations->InAt(use.GetInputIndex()); - if (input.IsRegister() || input.IsFpuRegister()) { - // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes - // is currently not supported. - InterferenceNode* fixed_node = input.IsRegister() - ? register_allocator_->physical_core_nodes_[input.reg()] - : register_allocator_->physical_fp_nodes_[input.reg()]; - CreateCoalesceOpportunity(node, - fixed_node, - CoalesceKind::kFixedInput, - user->GetLifetimePosition()); - } - } - } // for node in prunable_nodes -} - -static bool IsLowDegreeNode(InterferenceNode* node, size_t num_regs) { - return node->GetOutDegree() < num_regs; -} - -static bool IsHighDegreeNode(InterferenceNode* node, size_t num_regs) { - return !IsLowDegreeNode(node, num_regs); -} - -void ColoringIteration::PruneInterferenceGraph() { - DCHECK(pruned_nodes_.empty() - && simplify_worklist_.empty() - && freeze_worklist_.empty() - && spill_worklist_.empty()); - // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes, - // and all others as high degree nodes. The distinction is important: low degree nodes are - // guaranteed a color, while high degree nodes are not. - - // Build worklists. Note that the coalesce worklist has already been - // filled by FindCoalesceOpportunities(). - for (InterferenceNode* node : prunable_nodes_) { - DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned"; - if (IsLowDegreeNode(node, num_regs_)) { - if (node->GetCoalesceOpportunities().empty()) { - // Simplify Worklist. - node->stage = NodeStage::kSimplifyWorklist; - simplify_worklist_.push_back(node); - } else { - // Freeze Worklist. - node->stage = NodeStage::kFreezeWorklist; - freeze_worklist_.push_back(node); - } - } else { - // Spill worklist. - node->stage = NodeStage::kSpillWorklist; - spill_worklist_.push(node); - } - } - - // Prune graph. - // Note that we do not remove a node from its current worklist if it moves to another, so it may - // be in multiple worklists at once; the node's `phase` says which worklist it is really in. - while (true) { - if (!simplify_worklist_.empty()) { - // Prune low-degree nodes. - // TODO: pop_back() should work as well, but it didn't; we get a - // failed check while pruning. We should look into this. - InterferenceNode* node = simplify_worklist_.front(); - simplify_worklist_.pop_front(); - DCHECK_EQ(node->stage, NodeStage::kSimplifyWorklist) << "Cannot move from simplify list"; - DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in simplify list should be low degree"; - DCHECK(!node->IsMoveRelated()) << "Nodes in simplify list should not be move related"; - PruneNode(node); - } else if (!coalesce_worklist_.empty()) { - // Coalesce. - CoalesceOpportunity* opportunity = coalesce_worklist_.top(); - coalesce_worklist_.pop(); - if (opportunity->stage == CoalesceStage::kWorklist) { - Coalesce(opportunity); - } - } else if (!freeze_worklist_.empty()) { - // Freeze moves and prune a low-degree move-related node. - InterferenceNode* node = freeze_worklist_.front(); - freeze_worklist_.pop_front(); - if (node->stage == NodeStage::kFreezeWorklist) { - DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in freeze list should be low degree"; - DCHECK(node->IsMoveRelated()) << "Nodes in freeze list should be move related"; - FreezeMoves(node); - PruneNode(node); - } - } else if (!spill_worklist_.empty()) { - // We spill the lowest-priority node, because pruning a node earlier - // gives it a higher chance of being spilled. - InterferenceNode* node = spill_worklist_.top(); - spill_worklist_.pop(); - if (node->stage == NodeStage::kSpillWorklist) { - DCHECK_GE(node->GetOutDegree(), num_regs_) << "Nodes in spill list should be high degree"; - FreezeMoves(node); - PruneNode(node); - } - } else { - // Pruning complete. - break; - } - } - DCHECK_EQ(prunable_nodes_.size(), pruned_nodes_.size()); -} - -void ColoringIteration::EnableCoalesceOpportunities(InterferenceNode* node) { - for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) { - if (opportunity->stage == CoalesceStage::kActive) { - opportunity->stage = CoalesceStage::kWorklist; - coalesce_worklist_.push(opportunity); - } - } -} - -void ColoringIteration::PruneNode(InterferenceNode* node) { - DCHECK_NE(node->stage, NodeStage::kPruned); - DCHECK(!node->IsPrecolored()); - node->stage = NodeStage::kPruned; - pruned_nodes_.push(node); - - for (InterferenceNode* adj : node->GetAdjacentNodes()) { - DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes"; - - if (adj->IsPrecolored()) { - // No effect on pre-colored nodes; they're never pruned. - } else { - // Remove the interference. - bool was_high_degree = IsHighDegreeNode(adj, num_regs_); - DCHECK(adj->ContainsInterference(node)) - << "Missing reflexive interference from non-fixed node"; - adj->RemoveInterference(node); - - // Handle transitions from high degree to low degree. - if (was_high_degree && IsLowDegreeNode(adj, num_regs_)) { - EnableCoalesceOpportunities(adj); - for (InterferenceNode* adj_adj : adj->GetAdjacentNodes()) { - EnableCoalesceOpportunities(adj_adj); - } - - DCHECK_EQ(adj->stage, NodeStage::kSpillWorklist); - if (adj->IsMoveRelated()) { - adj->stage = NodeStage::kFreezeWorklist; - freeze_worklist_.push_back(adj); - } else { - adj->stage = NodeStage::kSimplifyWorklist; - simplify_worklist_.push_back(adj); - } - } - } - } -} - -void ColoringIteration::CheckTransitionFromFreezeWorklist(InterferenceNode* node) { - if (IsLowDegreeNode(node, num_regs_) && !node->IsMoveRelated()) { - DCHECK_EQ(node->stage, NodeStage::kFreezeWorklist); - node->stage = NodeStage::kSimplifyWorklist; - simplify_worklist_.push_back(node); - } -} - -void ColoringIteration::FreezeMoves(InterferenceNode* node) { - for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) { - if (opportunity->stage == CoalesceStage::kDefunct) { - // Constrained moves should remain constrained, since they will not be considered - // during last-chance coalescing. - } else { - opportunity->stage = CoalesceStage::kInactive; - } - InterferenceNode* other = opportunity->node_a->GetAlias() == node - ? opportunity->node_b->GetAlias() - : opportunity->node_a->GetAlias(); - if (other != node && other->stage == NodeStage::kFreezeWorklist) { - DCHECK(IsLowDegreeNode(node, num_regs_)); - CheckTransitionFromFreezeWorklist(other); - } - } -} - -bool ColoringIteration::PrecoloredHeuristic(InterferenceNode* from, - InterferenceNode* into) { - if (!into->IsPrecolored()) { - // The uncolored heuristic will cover this case. - return false; - } - if (from->IsPair() || into->IsPair()) { - // TODO: Merging from a pair node is currently not supported, since fixed pair nodes - // are currently represented as two single fixed nodes in the graph, and `into` is - // only one of them. (We may lose the implicit connections to the second one in a merge.) - return false; - } - - // If all adjacent nodes of `from` are "ok", then we can conservatively merge with `into`. - // Reasons an adjacent node `adj` can be "ok": - // (1) If `adj` is low degree, interference with `into` will not affect its existing - // colorable guarantee. (Notice that coalescing cannot increase its degree.) - // (2) If `adj` is pre-colored, it already interferes with `into`. See (3). - // (3) If there's already an interference with `into`, coalescing will not add interferences. - for (InterferenceNode* adj : from->GetAdjacentNodes()) { - if (IsLowDegreeNode(adj, num_regs_) || adj->IsPrecolored() || adj->ContainsInterference(into)) { - // Ok. - } else { - return false; - } - } - return true; -} - -bool ColoringIteration::UncoloredHeuristic(InterferenceNode* from, - InterferenceNode* into) { - if (into->IsPrecolored()) { - // The pre-colored heuristic will handle this case. - return false; - } - - // Arbitrary cap to improve compile time. Tests show that this has negligible affect - // on generated code. - if (from->GetOutDegree() + into->GetOutDegree() > 2 * num_regs_) { - return false; - } - - // It's safe to coalesce two nodes if the resulting node has fewer than `num_regs` neighbors - // of high degree. (Low degree neighbors can be ignored, because they will eventually be - // pruned from the interference graph in the simplify stage.) - size_t high_degree_interferences = 0; - for (InterferenceNode* adj : from->GetAdjacentNodes()) { - if (IsHighDegreeNode(adj, num_regs_)) { - high_degree_interferences += from->EdgeWeightWith(adj); - } - } - for (InterferenceNode* adj : into->GetAdjacentNodes()) { - if (IsHighDegreeNode(adj, num_regs_)) { - if (from->ContainsInterference(adj)) { - // We've already counted this adjacent node. - // Furthermore, its degree will decrease if coalescing succeeds. Thus, it's possible that - // we should not have counted it at all. (This extends the textbook Briggs coalescing test, - // but remains conservative.) - if (adj->GetOutDegree() - into->EdgeWeightWith(adj) < num_regs_) { - high_degree_interferences -= from->EdgeWeightWith(adj); - } - } else { - high_degree_interferences += into->EdgeWeightWith(adj); - } - } - } - - return high_degree_interferences < num_regs_; -} - -void ColoringIteration::Combine(InterferenceNode* from, - InterferenceNode* into) { - from->SetAlias(into); - - // Add interferences. - for (InterferenceNode* adj : from->GetAdjacentNodes()) { - bool was_low_degree = IsLowDegreeNode(adj, num_regs_); - AddPotentialInterference(adj, into, /*guaranteed_not_interfering_yet*/ false); - if (was_low_degree && IsHighDegreeNode(adj, num_regs_)) { - // This is a (temporary) transition to a high degree node. Its degree will decrease again - // when we prune `from`, but it's best to be consistent about the current worklist. - adj->stage = NodeStage::kSpillWorklist; - spill_worklist_.push(adj); - } - } - - // Add coalesce opportunities. - for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) { - if (opportunity->stage != CoalesceStage::kDefunct) { - into->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_); - } - } - EnableCoalesceOpportunities(from); - - // Prune and update worklists. - PruneNode(from); - if (IsLowDegreeNode(into, num_regs_)) { - // Coalesce(...) takes care of checking for a transition to the simplify worklist. - DCHECK_EQ(into->stage, NodeStage::kFreezeWorklist); - } else if (into->stage == NodeStage::kFreezeWorklist) { - // This is a transition to a high degree node. - into->stage = NodeStage::kSpillWorklist; - spill_worklist_.push(into); - } else { - DCHECK(into->stage == NodeStage::kSpillWorklist || into->stage == NodeStage::kPrecolored); - } -} - -void ColoringIteration::Coalesce(CoalesceOpportunity* opportunity) { - InterferenceNode* from = opportunity->node_a->GetAlias(); - InterferenceNode* into = opportunity->node_b->GetAlias(); - DCHECK_NE(from->stage, NodeStage::kPruned); - DCHECK_NE(into->stage, NodeStage::kPruned); - - if (from->IsPrecolored()) { - // If we have one pre-colored node, make sure it's the `into` node. - std::swap(from, into); - } - - if (from == into) { - // These nodes have already been coalesced. - opportunity->stage = CoalesceStage::kDefunct; - CheckTransitionFromFreezeWorklist(from); - } else if (from->IsPrecolored() || from->ContainsInterference(into)) { - // These nodes interfere. - opportunity->stage = CoalesceStage::kDefunct; - CheckTransitionFromFreezeWorklist(from); - CheckTransitionFromFreezeWorklist(into); - } else if (PrecoloredHeuristic(from, into) - || UncoloredHeuristic(from, into)) { - // We can coalesce these nodes. - opportunity->stage = CoalesceStage::kDefunct; - Combine(from, into); - CheckTransitionFromFreezeWorklist(into); - } else { - // We cannot coalesce, but we may be able to later. - opportunity->stage = CoalesceStage::kActive; - } -} - -// Build a mask with a bit set for each register assigned to some -// interval in `intervals`. -template <typename Container> -static std::bitset<kMaxNumRegs> BuildConflictMask(const Container& intervals) { - std::bitset<kMaxNumRegs> conflict_mask; - for (InterferenceNode* adjacent : intervals) { - LiveInterval* conflicting = adjacent->GetInterval(); - if (conflicting->HasRegister()) { - conflict_mask.set(conflicting->GetRegister()); - if (conflicting->HasHighInterval()) { - DCHECK(conflicting->GetHighInterval()->HasRegister()); - conflict_mask.set(conflicting->GetHighInterval()->GetRegister()); - } - } else { - DCHECK(!conflicting->HasHighInterval() - || !conflicting->GetHighInterval()->HasRegister()); - } - } - return conflict_mask; -} - -bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) { - return processing_core_regs - ? !codegen_->IsCoreCalleeSaveRegister(reg) - : !codegen_->IsFloatingPointCalleeSaveRegister(reg); -} - -static bool RegisterIsAligned(size_t reg) { - return reg % 2 == 0; -} - -static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask) { - // We use CTZ (count trailing zeros) to quickly find the lowest 0 bit. - // Note that CTZ is undefined if all bits are 0, so we special-case it. - return conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong()); -} - -bool ColoringIteration::ColorInterferenceGraph() { - DCHECK_LE(num_regs_, kMaxNumRegs) << "kMaxNumRegs is too small"; - ScopedArenaVector<LiveInterval*> colored_intervals( - allocator_->Adapter(kArenaAllocRegisterAllocator)); - bool successful = true; - - while (!pruned_nodes_.empty()) { - InterferenceNode* node = pruned_nodes_.top(); - pruned_nodes_.pop(); - LiveInterval* interval = node->GetInterval(); - size_t reg = 0; - - InterferenceNode* alias = node->GetAlias(); - if (alias != node) { - // This node was coalesced with another. - LiveInterval* alias_interval = alias->GetInterval(); - if (alias_interval->HasRegister()) { - reg = alias_interval->GetRegister(); - DCHECK(!BuildConflictMask(node->GetAdjacentNodes())[reg]) - << "This node conflicts with the register it was coalesced with"; - } else { - DCHECK(false) << node->GetOutDegree() << " " << alias->GetOutDegree() << " " - << "Move coalescing was not conservative, causing a node to be coalesced " - << "with another node that could not be colored"; - if (interval->RequiresRegister()) { - successful = false; - } - } - } else { - // Search for free register(s). - std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes()); - if (interval->HasHighInterval()) { - // Note that the graph coloring allocator assumes that pair intervals are aligned here, - // excluding pre-colored pair intervals (which can currently be unaligned on x86). If we - // change the alignment requirements here, we will have to update the algorithm (e.g., - // be more conservative about the weight of edges adjacent to pair nodes.) - while (reg < num_regs_ - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) { - reg += 2; - } - - // Try to use a caller-save register first. - for (size_t i = 0; i < num_regs_ - 1; i += 2) { - bool low_caller_save = register_allocator_->IsCallerSave(i, processing_core_regs_); - bool high_caller_save = register_allocator_->IsCallerSave(i + 1, processing_core_regs_); - if (!conflict_mask[i] && !conflict_mask[i + 1]) { - if (low_caller_save && high_caller_save) { - reg = i; - break; - } else if (low_caller_save || high_caller_save) { - reg = i; - // Keep looking to try to get both parts in caller-save registers. - } - } - } - } else { - // Not a pair interval. - reg = FindFirstZeroInConflictMask(conflict_mask); - - // Try to use caller-save registers first. - for (size_t i = 0; i < num_regs_; ++i) { - if (!conflict_mask[i] && register_allocator_->IsCallerSave(i, processing_core_regs_)) { - reg = i; - break; - } - } - } - - // Last-chance coalescing. - for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) { - if (opportunity->stage == CoalesceStage::kDefunct) { - continue; - } - LiveInterval* other_interval = opportunity->node_a->GetAlias() == node - ? opportunity->node_b->GetAlias()->GetInterval() - : opportunity->node_a->GetAlias()->GetInterval(); - if (other_interval->HasRegister()) { - size_t coalesce_register = other_interval->GetRegister(); - if (interval->HasHighInterval()) { - if (!conflict_mask[coalesce_register] && - !conflict_mask[coalesce_register + 1] && - RegisterIsAligned(coalesce_register)) { - reg = coalesce_register; - break; - } - } else if (!conflict_mask[coalesce_register]) { - reg = coalesce_register; - break; - } - } - } - } - - if (reg < (interval->HasHighInterval() ? num_regs_ - 1 : num_regs_)) { - // Assign register. - DCHECK(!interval->HasRegister()); - interval->SetRegister(reg); - colored_intervals.push_back(interval); - if (interval->HasHighInterval()) { - DCHECK(!interval->GetHighInterval()->HasRegister()); - interval->GetHighInterval()->SetRegister(reg + 1); - colored_intervals.push_back(interval->GetHighInterval()); - } - } else if (interval->RequiresRegister()) { - // The interference graph is too dense to color. Make it sparser by - // splitting this live interval. - successful = false; - register_allocator_->SplitAtRegisterUses(interval); - // We continue coloring, because there may be additional intervals that cannot - // be colored, and that we should split. - } else { - // Spill. - node->SetNeedsSpillSlot(); - } - } - - // If unsuccessful, reset all register assignments. - if (!successful) { - for (LiveInterval* interval : colored_intervals) { - interval->ClearRegister(); - } - } - - return successful; -} - -void RegisterAllocatorGraphColor::AllocateSpillSlots(ArrayRef<InterferenceNode* const> nodes) { - // The register allocation resolver will organize the stack based on value type, - // so we assign stack slots for each value type separately. - ScopedArenaAllocator allocator(allocator_->GetArenaStack()); - ScopedArenaAllocatorAdapter<void> adapter = allocator.Adapter(kArenaAllocRegisterAllocator); - ScopedArenaVector<LiveInterval*> double_intervals(adapter); - ScopedArenaVector<LiveInterval*> long_intervals(adapter); - ScopedArenaVector<LiveInterval*> float_intervals(adapter); - ScopedArenaVector<LiveInterval*> int_intervals(adapter); - - // The set of parent intervals already handled. - ScopedArenaSet<LiveInterval*> seen(adapter); - - // Find nodes that need spill slots. - for (InterferenceNode* node : nodes) { - if (!node->NeedsSpillSlot()) { - continue; - } - - LiveInterval* parent = node->GetInterval()->GetParent(); - if (seen.find(parent) != seen.end()) { - // We've already handled this interval. - // This can happen if multiple siblings of the same interval request a stack slot. - continue; - } - seen.insert(parent); - - HInstruction* defined_by = parent->GetDefinedBy(); - if (parent->HasSpillSlot()) { - // We already have a spill slot for this value that we can reuse. - } else if (defined_by->IsParameterValue()) { - // Parameters already have a stack slot. - parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue())); - } else if (defined_by->IsCurrentMethod()) { - // The current method is always at stack slot 0. - parent->SetSpillSlot(0); - } else if (defined_by->IsConstant()) { - // Constants don't need a spill slot. - } else { - // We need to find a spill slot for this interval. Place it in the correct - // worklist to be processed later. - switch (node->GetInterval()->GetType()) { - case DataType::Type::kFloat64: - double_intervals.push_back(parent); - break; - case DataType::Type::kInt64: - long_intervals.push_back(parent); - break; - case DataType::Type::kFloat32: - float_intervals.push_back(parent); - break; - case DataType::Type::kReference: - case DataType::Type::kInt32: - case DataType::Type::kUint16: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kBool: - case DataType::Type::kInt16: - int_intervals.push_back(parent); - break; - case DataType::Type::kUint32: - case DataType::Type::kUint64: - case DataType::Type::kVoid: - LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType(); - UNREACHABLE(); - } - } - } - - // Color spill slots for each value type. - ColorSpillSlots(ArrayRef<LiveInterval* const>(double_intervals), &num_double_spill_slots_); - ColorSpillSlots(ArrayRef<LiveInterval* const>(long_intervals), &num_long_spill_slots_); - ColorSpillSlots(ArrayRef<LiveInterval* const>(float_intervals), &num_float_spill_slots_); - ColorSpillSlots(ArrayRef<LiveInterval* const>(int_intervals), &num_int_spill_slots_); -} - -void RegisterAllocatorGraphColor::ColorSpillSlots(ArrayRef<LiveInterval* const> intervals, - /* out */ size_t* num_stack_slots_used) { - // We cannot use the original interference graph here because spill slots are assigned to - // all of the siblings of an interval, whereas an interference node represents only a single - // sibling. So, we assign spill slots linear-scan-style by sorting all the interval endpoints - // by position, and assigning the lowest spill slot available when we encounter an interval - // beginning. We ignore lifetime holes for simplicity. - ScopedArenaAllocator allocator(allocator_->GetArenaStack()); - ScopedArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints( - allocator.Adapter(kArenaAllocRegisterAllocator)); - - for (LiveInterval* parent_interval : intervals) { - DCHECK(parent_interval->IsParent()); - DCHECK(!parent_interval->HasSpillSlot()); - size_t start = parent_interval->GetStart(); - size_t end = parent_interval->GetLastSibling()->GetEnd(); - DCHECK_LT(start, end); - interval_endpoints.push_back(std::make_tuple(start, true, parent_interval)); - interval_endpoints.push_back(std::make_tuple(end, false, parent_interval)); - } - - // Sort by position. - // We explicitly ignore the third entry of each tuple (the interval pointer) in order - // to maintain determinism. - std::sort(interval_endpoints.begin(), interval_endpoints.end(), - [] (const std::tuple<size_t, bool, LiveInterval*>& lhs, - const std::tuple<size_t, bool, LiveInterval*>& rhs) { - return std::tie(std::get<0>(lhs), std::get<1>(lhs)) - < std::tie(std::get<0>(rhs), std::get<1>(rhs)); - }); - - ArenaBitVector taken(&allocator, 0, true, kArenaAllocRegisterAllocator); - for (auto it = interval_endpoints.begin(), end = interval_endpoints.end(); it != end; ++it) { - // Extract information from the current tuple. - LiveInterval* parent_interval; - bool is_interval_beginning; - size_t position; - std::tie(position, is_interval_beginning, parent_interval) = *it; - size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded(); - - if (is_interval_beginning) { - DCHECK(!parent_interval->HasSpillSlot()); - DCHECK_EQ(position, parent_interval->GetStart()); - - // Find first available free stack slot(s). - size_t slot = 0; - for (; ; ++slot) { - bool found = true; - for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { - if (taken.IsBitSet(s)) { - found = false; - break; // failure - } - } - if (found) { - break; // success - } - } - - parent_interval->SetSpillSlot(slot); - - *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed); - if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) { - // The parallel move resolver requires that there be an even number of spill slots - // allocated for pair value types. - ++(*num_stack_slots_used); - } - - for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { - taken.SetBit(s); - } - } else { - DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd()); - DCHECK(parent_interval->HasSpillSlot()); - - // Free up the stack slot(s) used by this interval. - size_t slot = parent_interval->GetSpillSlot(); - for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { - DCHECK(taken.IsBitSet(s)); - taken.ClearBit(s); - } - } - } - DCHECK_EQ(taken.NumSetBits(), 0u); -} - -} // namespace art diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h deleted file mode 100644 index 0e10152049..0000000000 --- a/compiler/optimizing/register_allocator_graph_color.h +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_ -#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_ - -#include "arch/instruction_set.h" -#include "base/arena_object.h" -#include "base/array_ref.h" -#include "base/macros.h" -#include "base/scoped_arena_containers.h" -#include "register_allocator.h" - -namespace art HIDDEN { - -class CodeGenerator; -class HBasicBlock; -class HGraph; -class HInstruction; -class HParallelMove; -class Location; -class SsaLivenessAnalysis; -class InterferenceNode; -struct CoalesceOpportunity; -enum class CoalesceKind; - -/** - * A graph coloring register allocator. - * - * The algorithm proceeds as follows: - * (1) Build an interference graph, where nodes represent live intervals, and edges represent - * interferences between two intervals. Coloring this graph with k colors is isomorphic to - * finding a valid register assignment with k registers. - * (2) To color the graph, first prune all nodes with degree less than k, since these nodes are - * guaranteed a color. (No matter how we color their adjacent nodes, we can give them a - * different color.) As we prune nodes from the graph, more nodes may drop below degree k, - * enabling further pruning. The key is to maintain the pruning order in a stack, so that we - * can color the nodes in the reverse order. - * When there are no more nodes with degree less than k, we start pruning alternate nodes based - * on heuristics. Since these nodes are not guaranteed a color, we are careful to - * prioritize nodes that require a register. We also prioritize short intervals, because - * short intervals cannot be split very much if coloring fails (see below). "Prioritizing" - * a node amounts to pruning it later, since it will have fewer interferences if we prune other - * nodes first. - * (3) We color nodes in the reverse order in which we pruned them. If we cannot assign - * a node a color, we do one of two things: - * - If the node requires a register, we consider the current coloring attempt a failure. - * However, we split the node's live interval in order to make the interference graph - * sparser, so that future coloring attempts may succeed. - * - If the node does not require a register, we simply assign it a location on the stack. - * - * If iterative move coalescing is enabled, the algorithm also attempts to conservatively - * combine nodes in the graph that would prefer to have the same color. (For example, the output - * of a phi instruction would prefer to have the same register as at least one of its inputs.) - * There are several additional steps involved with this: - * - We look for coalesce opportunities by examining each live interval, a step similar to that - * used by linear scan when looking for register hints. - * - When pruning the graph, we maintain a worklist of coalesce opportunities, as well as a worklist - * of low degree nodes that have associated coalesce opportunities. Only when we run out of - * coalesce opportunities do we start pruning coalesce-associated nodes. - * - When pruning a node, if any nodes transition from high degree to low degree, we add - * associated coalesce opportunities to the worklist, since these opportunities may now succeed. - * - Whether two nodes can be combined is decided by two different heuristics--one used when - * coalescing uncolored nodes, and one used for coalescing an uncolored node with a colored node. - * It is vital that we only combine two nodes if the node that remains is guaranteed to receive - * a color. This is because additionally spilling is more costly than failing to coalesce. - * - Even if nodes are not coalesced while pruning, we keep the coalesce opportunities around - * to be used as last-chance register hints when coloring. If nothing else, we try to use - * caller-save registers before callee-save registers. - * - * A good reference for graph coloring register allocation is - * "Modern Compiler Implementation in Java" (Andrew W. Appel, 2nd Edition). - */ -class RegisterAllocatorGraphColor : public RegisterAllocator { - public: - RegisterAllocatorGraphColor(ScopedArenaAllocator* allocator, - CodeGenerator* codegen, - const SsaLivenessAnalysis& analysis, - bool iterative_move_coalescing = true); - ~RegisterAllocatorGraphColor() override; - - void AllocateRegisters() override; - - bool Validate(bool log_fatal_on_failure) override; - - private: - // Collect all intervals and prepare for register allocation. - void ProcessInstructions(); - void ProcessInstruction(HInstruction* instruction); - - // If any inputs require specific registers, block those registers - // at the position of this instruction. - void CheckForFixedInputs(HInstruction* instruction); - - // If the output of an instruction requires a specific register, split - // the interval and assign the register to the first part. - void CheckForFixedOutput(HInstruction* instruction); - - // Add all applicable safepoints to a live interval. - // Currently depends on instruction processing order. - void AddSafepointsFor(HInstruction* instruction); - - // Collect all live intervals associated with the temporary locations - // needed by an instruction. - void CheckForTempLiveIntervals(HInstruction* instruction); - - // If a safe point is needed, add a synthesized interval to later record - // the number of live registers at this point. - void CheckForSafepoint(HInstruction* instruction); - - // Try to remove the SuspendCheck at function entry. Returns true if it was successful. - bool TryRemoveSuspendCheckEntry(HInstruction* instruction); - - // Split an interval, but only if `position` is inside of `interval`. - // Return either the new interval, or the original interval if not split. - static LiveInterval* TrySplit(LiveInterval* interval, size_t position); - - // To ensure every graph can be colored, split live intervals - // at their register defs and uses. This creates short intervals with low - // degree in the interference graph, which are prioritized during graph - // coloring. - void SplitAtRegisterUses(LiveInterval* interval); - - // If the given instruction is a catch phi, give it a spill slot. - void AllocateSpillSlotForCatchPhi(HInstruction* instruction); - - // Ensure that the given register cannot be allocated for a given range. - void BlockRegister(Location location, size_t start, size_t end); - void BlockRegisters(size_t start, size_t end, bool caller_save_only = false); - - bool IsCallerSave(size_t reg, bool processing_core_regs); - - // Assigns stack slots to a list of intervals, ensuring that interfering intervals are not - // assigned the same stack slot. - void ColorSpillSlots(ArrayRef<LiveInterval* const> nodes, /* out */ size_t* num_stack_slots_used); - - // Provide stack slots to nodes that need them. - void AllocateSpillSlots(ArrayRef<InterferenceNode* const> nodes); - - // Whether iterative move coalescing should be performed. Iterative move coalescing - // improves code quality, but increases compile time. - const bool iterative_move_coalescing_; - - // Live intervals, split by kind (core and floating point). - // These should not contain high intervals, as those are represented by - // the corresponding low interval throughout register allocation. - ScopedArenaVector<LiveInterval*> core_intervals_; - ScopedArenaVector<LiveInterval*> fp_intervals_; - - // Intervals for temporaries, saved for special handling in the resolution phase. - ScopedArenaVector<LiveInterval*> temp_intervals_; - - // Safepoints, saved for special handling while processing instructions. - ScopedArenaVector<HInstruction*> safepoints_; - - // Interference nodes representing specific registers. These are "pre-colored" nodes - // in the interference graph. - ScopedArenaVector<InterferenceNode*> physical_core_nodes_; - ScopedArenaVector<InterferenceNode*> physical_fp_nodes_; - - // Allocated stack slot counters. - size_t num_int_spill_slots_; - size_t num_double_spill_slots_; - size_t num_float_spill_slots_; - size_t num_long_spill_slots_; - size_t catch_phi_spill_slot_counter_; - - // Number of stack slots needed for the pointer to the current method. - // This is 1 for 32-bit architectures, and 2 for 64-bit architectures. - const size_t reserved_art_method_slots_; - - // Number of stack slots needed for outgoing arguments. - const size_t reserved_out_slots_; - - friend class ColoringIteration; - - DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorGraphColor); -}; - -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_ diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc index fcdaa2d34f..ffa9937cc5 100644 --- a/compiler/optimizing/register_allocator_linear_scan.cc +++ b/compiler/optimizing/register_allocator_linear_scan.cc @@ -1208,8 +1208,7 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotForCatchPhi(HPhi* phi) { LiveInterval* interval = phi->GetLiveInterval(); HInstruction* previous_phi = phi->GetPrevious(); - DCHECK(previous_phi == nullptr || - previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber()) + DCHECK(previous_phi == nullptr || previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber()) << "Phis expected to be sorted by vreg number, so that equivalent phis are adjacent."; if (phi->IsVRegEquivalentOf(previous_phi)) { diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index d316aa5dc2..0d2d20682d 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -84,7 +84,8 @@ class RegisterAllocatorTest : public CommonCompilerTest, public OptimizingUnitTe TEST_F(RegisterAllocatorTest, test_name##_LinearScan) {\ test_name(Strategy::kRegisterAllocatorLinearScan);\ }\ -TEST_F(RegisterAllocatorTest, test_name##_GraphColor) {\ +/* Note: Graph coloring register allocator has been removed, so the test is DISABLED. */ \ +TEST_F(RegisterAllocatorTest, DISABLED_##test_name##_GraphColor) {\ test_name(Strategy::kRegisterAllocatorGraphColor);\ } diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc index 116f52605e..1cdc98a8be 100644 --- a/compiler/optimizing/scheduler.cc +++ b/compiler/optimizing/scheduler.cc @@ -490,9 +490,9 @@ SchedulingNode* CriticalPathSchedulingNodeSelector::SelectMaterializedCondition( DCHECK(instruction != nullptr); if (instruction->IsIf()) { - condition = instruction->AsIf()->InputAt(0)->AsCondition(); + condition = instruction->AsIf()->InputAt(0)->AsConditionOrNull(); } else if (instruction->IsSelect()) { - condition = instruction->AsSelect()->GetCondition()->AsCondition(); + condition = instruction->AsSelect()->GetCondition()->AsConditionOrNull(); } SchedulingNode* condition_node = (condition != nullptr) ? graph.GetNode(condition) : nullptr; diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc index 3f931c4c49..53ad2b12c0 100644 --- a/compiler/optimizing/scheduler_arm.cc +++ b/compiler/optimizing/scheduler_arm.cc @@ -610,7 +610,7 @@ void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifte } } -void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM::VisitIntermediateAddress([[maybe_unused]] HIntermediateAddress*) { // Although the code generated is a simple `add` instruction, we found through empirical results // that spacing it from its use in memory accesses was beneficial. last_visited_internal_latency_ = kArmNopLatency; @@ -618,11 +618,11 @@ void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* } void SchedulingLatencyVisitorARM::VisitIntermediateAddressIndex( - HIntermediateAddressIndex* ATTRIBUTE_UNUSED) { + [[maybe_unused]] HIntermediateAddressIndex*) { UNIMPLEMENTED(FATAL) << "IntermediateAddressIndex is not implemented for ARM"; } -void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate([[maybe_unused]] HMultiplyAccumulate*) { last_visited_latency_ = kArmMulIntegerLatency; } @@ -806,7 +806,7 @@ void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) { } } -void SchedulingLatencyVisitorARM::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM::VisitBoundsCheck([[maybe_unused]] HBoundsCheck*) { last_visited_internal_latency_ = kArmIntegerOpLatency; // Users do not use any data results. last_visited_latency_ = 0; @@ -866,22 +866,22 @@ void SchedulingLatencyVisitorARM::VisitInstanceFieldSet(HInstanceFieldSet* instr HandleFieldSetLatencies(instruction, instruction->GetFieldInfo()); } -void SchedulingLatencyVisitorARM::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM::VisitInstanceOf([[maybe_unused]] HInstanceOf*) { last_visited_internal_latency_ = kArmCallInternalLatency; last_visited_latency_ = kArmIntegerOpLatency; } -void SchedulingLatencyVisitorARM::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM::VisitInvoke([[maybe_unused]] HInvoke*) { last_visited_internal_latency_ = kArmCallInternalLatency; last_visited_latency_ = kArmCallLatency; } -void SchedulingLatencyVisitorARM::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM::VisitLoadString([[maybe_unused]] HLoadString*) { last_visited_internal_latency_ = kArmLoadStringInternalLatency; last_visited_latency_ = kArmMemoryLoadLatency; } -void SchedulingLatencyVisitorARM::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM::VisitNewArray([[maybe_unused]] HNewArray*) { last_visited_internal_latency_ = kArmIntegerOpLatency + kArmCallInternalLatency; last_visited_latency_ = kArmCallLatency; } diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h index 0da21c187f..cedc12a2be 100644 --- a/compiler/optimizing/scheduler_arm.h +++ b/compiler/optimizing/scheduler_arm.h @@ -53,7 +53,7 @@ class SchedulingLatencyVisitorARM final : public SchedulingLatencyVisitor { : codegen_(down_cast<CodeGeneratorARMVIXL*>(codegen)) {} // Default visitor for instructions not handled specifically below. - void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override { + void VisitInstruction([[maybe_unused]] HInstruction*) override { last_visited_latency_ = kArmIntegerOpLatency; } diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc index 3071afd951..5113cf446d 100644 --- a/compiler/optimizing/scheduler_arm64.cc +++ b/compiler/optimizing/scheduler_arm64.cc @@ -30,30 +30,30 @@ void SchedulingLatencyVisitorARM64::VisitBinaryOperation(HBinaryOperation* instr } void SchedulingLatencyVisitorARM64::VisitBitwiseNegatedRight( - HBitwiseNegatedRight* ATTRIBUTE_UNUSED) { + [[maybe_unused]] HBitwiseNegatedRight*) { last_visited_latency_ = kArm64IntegerOpLatency; } void SchedulingLatencyVisitorARM64::VisitDataProcWithShifterOp( - HDataProcWithShifterOp* ATTRIBUTE_UNUSED) { + [[maybe_unused]] HDataProcWithShifterOp*) { last_visited_latency_ = kArm64DataProcWithShifterOpLatency; } void SchedulingLatencyVisitorARM64::VisitIntermediateAddress( - HIntermediateAddress* ATTRIBUTE_UNUSED) { + [[maybe_unused]] HIntermediateAddress*) { // Although the code generated is a simple `add` instruction, we found through empirical results // that spacing it from its use in memory accesses was beneficial. last_visited_latency_ = kArm64IntegerOpLatency + 2; } void SchedulingLatencyVisitorARM64::VisitIntermediateAddressIndex( - HIntermediateAddressIndex* instr ATTRIBUTE_UNUSED) { + [[maybe_unused]] HIntermediateAddressIndex* instr) { // Although the code generated is a simple `add` instruction, we found through empirical results // that spacing it from its use in memory accesses was beneficial. last_visited_latency_ = kArm64DataProcWithShifterOpLatency + 2; } -void SchedulingLatencyVisitorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitMultiplyAccumulate([[maybe_unused]] HMultiplyAccumulate*) { last_visited_latency_ = kArm64MulIntegerLatency; } @@ -65,15 +65,15 @@ void SchedulingLatencyVisitorARM64::VisitArrayGet(HArrayGet* instruction) { last_visited_latency_ = kArm64MemoryLoadLatency; } -void SchedulingLatencyVisitorARM64::VisitArrayLength(HArrayLength* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitArrayLength([[maybe_unused]] HArrayLength*) { last_visited_latency_ = kArm64MemoryLoadLatency; } -void SchedulingLatencyVisitorARM64::VisitArraySet(HArraySet* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitArraySet([[maybe_unused]] HArraySet*) { last_visited_latency_ = kArm64MemoryStoreLatency; } -void SchedulingLatencyVisitorARM64::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitBoundsCheck([[maybe_unused]] HBoundsCheck*) { last_visited_internal_latency_ = kArm64IntegerOpLatency; // Users do not use any data results. last_visited_latency_ = 0; @@ -113,21 +113,21 @@ void SchedulingLatencyVisitorARM64::VisitDiv(HDiv* instr) { } } -void SchedulingLatencyVisitorARM64::VisitInstanceFieldGet(HInstanceFieldGet* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitInstanceFieldGet([[maybe_unused]] HInstanceFieldGet*) { last_visited_latency_ = kArm64MemoryLoadLatency; } -void SchedulingLatencyVisitorARM64::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitInstanceOf([[maybe_unused]] HInstanceOf*) { last_visited_internal_latency_ = kArm64CallInternalLatency; last_visited_latency_ = kArm64IntegerOpLatency; } -void SchedulingLatencyVisitorARM64::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitInvoke([[maybe_unused]] HInvoke*) { last_visited_internal_latency_ = kArm64CallInternalLatency; last_visited_latency_ = kArm64CallLatency; } -void SchedulingLatencyVisitorARM64::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitLoadString([[maybe_unused]] HLoadString*) { last_visited_internal_latency_ = kArm64LoadStringInternalLatency; last_visited_latency_ = kArm64MemoryLoadLatency; } @@ -138,7 +138,7 @@ void SchedulingLatencyVisitorARM64::VisitMul(HMul* instr) { : kArm64MulIntegerLatency; } -void SchedulingLatencyVisitorARM64::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitNewArray([[maybe_unused]] HNewArray*) { last_visited_internal_latency_ = kArm64IntegerOpLatency + kArm64CallInternalLatency; last_visited_latency_ = kArm64CallLatency; } @@ -181,7 +181,7 @@ void SchedulingLatencyVisitorARM64::VisitRem(HRem* instruction) { } } -void SchedulingLatencyVisitorARM64::VisitStaticFieldGet(HStaticFieldGet* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitStaticFieldGet([[maybe_unused]] HStaticFieldGet*) { last_visited_latency_ = kArm64MemoryLoadLatency; } @@ -211,7 +211,7 @@ void SchedulingLatencyVisitorARM64::HandleSimpleArithmeticSIMD(HVecOperation *in } void SchedulingLatencyVisitorARM64::VisitVecReplicateScalar( - HVecReplicateScalar* instr ATTRIBUTE_UNUSED) { + [[maybe_unused]] HVecReplicateScalar* instr) { last_visited_latency_ = kArm64SIMDReplicateOpLatency; } @@ -223,7 +223,7 @@ void SchedulingLatencyVisitorARM64::VisitVecReduce(HVecReduce* instr) { HandleSimpleArithmeticSIMD(instr); } -void SchedulingLatencyVisitorARM64::VisitVecCnv(HVecCnv* instr ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitVecCnv([[maybe_unused]] HVecCnv* instr) { last_visited_latency_ = kArm64SIMDTypeConversionInt2FPLatency; } @@ -279,19 +279,19 @@ void SchedulingLatencyVisitorARM64::VisitVecMax(HVecMax* instr) { HandleSimpleArithmeticSIMD(instr); } -void SchedulingLatencyVisitorARM64::VisitVecAnd(HVecAnd* instr ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitVecAnd([[maybe_unused]] HVecAnd* instr) { last_visited_latency_ = kArm64SIMDIntegerOpLatency; } -void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitVecAndNot([[maybe_unused]] HVecAndNot* instr) { last_visited_latency_ = kArm64SIMDIntegerOpLatency; } -void SchedulingLatencyVisitorARM64::VisitVecOr(HVecOr* instr ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitVecOr([[maybe_unused]] HVecOr* instr) { last_visited_latency_ = kArm64SIMDIntegerOpLatency; } -void SchedulingLatencyVisitorARM64::VisitVecXor(HVecXor* instr ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitVecXor([[maybe_unused]] HVecXor* instr) { last_visited_latency_ = kArm64SIMDIntegerOpLatency; } @@ -312,13 +312,12 @@ void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) { } void SchedulingLatencyVisitorARM64::VisitVecMultiplyAccumulate( - HVecMultiplyAccumulate* instr ATTRIBUTE_UNUSED) { + [[maybe_unused]] HVecMultiplyAccumulate* instr) { last_visited_latency_ = kArm64SIMDMulIntegerLatency; } -void SchedulingLatencyVisitorARM64::HandleVecAddress( - HVecMemoryOperation* instruction, - size_t size ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::HandleVecAddress(HVecMemoryOperation* instruction, + [[maybe_unused]] size_t size) { HInstruction* index = instruction->InputAt(1); if (!index->IsConstant()) { last_visited_internal_latency_ += kArm64DataProcWithShifterOpLatency; diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h index ec41577e9d..7ce00e00ab 100644 --- a/compiler/optimizing/scheduler_arm64.h +++ b/compiler/optimizing/scheduler_arm64.h @@ -59,7 +59,7 @@ static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10; class SchedulingLatencyVisitorARM64 final : public SchedulingLatencyVisitor { public: // Default visitor for instructions not handled specifically below. - void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override { + void VisitInstruction([[maybe_unused]] HInstruction*) override { last_visited_latency_ = kArm64IntegerOpLatency; } diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc index 6a10440d11..07065efbb7 100644 --- a/compiler/optimizing/select_generator.cc +++ b/compiler/optimizing/select_generator.cc @@ -46,8 +46,7 @@ static bool IsSimpleBlock(HBasicBlock* block) { } else if (instruction->CanBeMoved() && !instruction->HasSideEffects() && !instruction->CanThrow()) { - if (instruction->IsSelect() && - instruction->AsSelect()->GetCondition()->GetBlock() == block) { + if (instruction->IsSelect() && instruction->AsSelect()->GetCondition()->GetBlock() == block) { // Count one HCondition and HSelect in the same block as a single instruction. // This enables finding nested selects. continue; diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index a658252e69..2179bf50b5 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -604,7 +604,7 @@ GraphAnalysisResult SsaBuilder::BuildSsa() { */ HFloatConstant* SsaBuilder::GetFloatEquivalent(HIntConstant* constant) { // We place the floating point constant next to this constant. - HFloatConstant* result = constant->GetNext()->AsFloatConstant(); + HFloatConstant* result = constant->GetNext()->AsFloatConstantOrNull(); if (result == nullptr) { float value = bit_cast<float, int32_t>(constant->GetValue()); result = new (graph_->GetAllocator()) HFloatConstant(value); @@ -626,7 +626,7 @@ HFloatConstant* SsaBuilder::GetFloatEquivalent(HIntConstant* constant) { */ HDoubleConstant* SsaBuilder::GetDoubleEquivalent(HLongConstant* constant) { // We place the floating point constant next to this constant. - HDoubleConstant* result = constant->GetNext()->AsDoubleConstant(); + HDoubleConstant* result = constant->GetNext()->AsDoubleConstantOrNull(); if (result == nullptr) { double value = bit_cast<double, int64_t>(constant->GetValue()); result = new (graph_->GetAllocator()) HDoubleConstant(value); @@ -652,16 +652,16 @@ HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, DataType:: // We place the floating point /reference phi next to this phi. HInstruction* next = phi->GetNext(); - if (next != nullptr - && next->AsPhi()->GetRegNumber() == phi->GetRegNumber() - && next->GetType() != type) { + if (next != nullptr && + next->AsPhi()->GetRegNumber() == phi->GetRegNumber() && + next->GetType() != type) { // Move to the next phi to see if it is the one we are looking for. next = next->GetNext(); } - if (next == nullptr - || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber()) - || (next->GetType() != type)) { + if (next == nullptr || + (next->AsPhi()->GetRegNumber() != phi->GetRegNumber()) || + (next->GetType() != type)) { ArenaAllocator* allocator = graph_->GetAllocator(); HInputsRef inputs = phi->GetInputs(); HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), inputs.size(), type); diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc index 2df0f34c7d..18c945381d 100644 --- a/compiler/optimizing/ssa_liveness_analysis_test.cc +++ b/compiler/optimizing/ssa_liveness_analysis_test.cc @@ -31,6 +31,7 @@ namespace art HIDDEN { class SsaLivenessAnalysisTest : public OptimizingUnitTest { protected: void SetUp() override { + TEST_SETUP_DISABLED_FOR_RISCV64(); OptimizingUnitTest::SetUp(); graph_ = CreateGraph(); compiler_options_ = CommonCompilerTest::CreateCompilerOptions(kRuntimeISA, "default"); @@ -42,6 +43,11 @@ class SsaLivenessAnalysisTest : public OptimizingUnitTest { graph_->SetEntryBlock(entry_); } + void TearDown() override { + TEST_TEARDOWN_DISABLED_FOR_RISCV64(); + OptimizingUnitTest::TearDown(); + } + protected: HBasicBlock* CreateSuccessor(HBasicBlock* block) { HGraph* graph = block->GetGraph(); @@ -58,6 +64,7 @@ class SsaLivenessAnalysisTest : public OptimizingUnitTest { }; TEST_F(SsaLivenessAnalysisTest, TestReturnArg) { + TEST_DISABLED_FOR_RISCV64(); HInstruction* arg = new (GetAllocator()) HParameterValue( graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kInt32); entry_->AddInstruction(arg); @@ -78,6 +85,7 @@ TEST_F(SsaLivenessAnalysisTest, TestReturnArg) { } TEST_F(SsaLivenessAnalysisTest, TestAput) { + TEST_DISABLED_FOR_RISCV64(); HInstruction* array = new (GetAllocator()) HParameterValue( graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); HInstruction* index = new (GetAllocator()) HParameterValue( @@ -147,6 +155,7 @@ TEST_F(SsaLivenessAnalysisTest, TestAput) { } TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) { + TEST_DISABLED_FOR_RISCV64(); HInstruction* array = new (GetAllocator()) HParameterValue( graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); HInstruction* index = new (GetAllocator()) HParameterValue( diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index ce343dffec..1d9be3956a 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -76,7 +76,7 @@ void SsaDeadPhiElimination::MarkDeadPhis() { HPhi* phi = worklist.back(); worklist.pop_back(); for (HInstruction* raw_input : phi->GetInputs()) { - HPhi* input = raw_input->AsPhi(); + HPhi* input = raw_input->AsPhiOrNull(); if (input != nullptr && input->IsDead()) { // Input is a dead phi. Revive it and add to the worklist. We make sure // that the phi was not dead initially (see definition of `initially_live`). diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 1a368ed347..2ecda7610e 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -51,7 +51,8 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes, size_t fp_spill_mask, uint32_t num_dex_registers, bool baseline, - bool debuggable) { + bool debuggable, + bool has_should_deoptimize_flag) { DCHECK(!in_method_) << "Mismatched Begin/End calls"; in_method_ = true; DCHECK_EQ(packed_frame_size_, 0u) << "BeginMethod was already called"; @@ -63,6 +64,7 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes, num_dex_registers_ = num_dex_registers; baseline_ = baseline; debuggable_ = debuggable; + has_should_deoptimize_flag_ = has_should_deoptimize_flag; if (kVerifyStackMaps) { dchecks_.emplace_back([=](const CodeInfo& code_info) { @@ -152,8 +154,10 @@ void StackMapStream::BeginStackMapEntry( // Create lambda method, which will be executed at the very end to verify data. // Parameters and local variables will be captured(stored) by the lambda "[=]". dchecks_.emplace_back([=](const CodeInfo& code_info) { + // The `native_pc_offset` may have been overridden using `SetStackMapNativePcOffset(.)`. + uint32_t final_native_pc_offset = GetStackMapNativePcOffset(stack_map_index); if (kind == StackMap::Kind::Default || kind == StackMap::Kind::OSR) { - StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, + StackMap stack_map = code_info.GetStackMapForNativePcOffset(final_native_pc_offset, instruction_set_); CHECK_EQ(stack_map.Row(), stack_map_index); } else if (kind == StackMap::Kind::Catch) { @@ -162,7 +166,7 @@ void StackMapStream::BeginStackMapEntry( CHECK_EQ(stack_map.Row(), stack_map_index); } StackMap stack_map = code_info.GetStackMapAt(stack_map_index); - CHECK_EQ(stack_map.GetNativePcOffset(instruction_set_), native_pc_offset); + CHECK_EQ(stack_map.GetNativePcOffset(instruction_set_), final_native_pc_offset); CHECK_EQ(stack_map.GetKind(), static_cast<uint32_t>(kind)); CHECK_EQ(stack_map.GetDexPc(), dex_pc); CHECK_EQ(code_info.GetRegisterMaskOf(stack_map), register_mask); @@ -374,10 +378,12 @@ ScopedArenaVector<uint8_t> StackMapStream::Encode() { DCHECK(in_stack_map_ == false) << "Mismatched Begin/End calls"; DCHECK(in_inline_info_ == false) << "Mismatched Begin/End calls"; - uint32_t flags = (inline_infos_.size() > 0) ? CodeInfo::kHasInlineInfo : 0; + uint32_t flags = 0; + flags |= (inline_infos_.size() > 0) ? CodeInfo::kHasInlineInfo : 0; flags |= baseline_ ? CodeInfo::kIsBaseline : 0; flags |= debuggable_ ? CodeInfo::kIsDebuggable : 0; - DCHECK_LE(flags, kVarintMax); // Ensure flags can be read directly as byte. + flags |= has_should_deoptimize_flag_ ? CodeInfo::kHasShouldDeoptimizeFlag : 0; + uint32_t bit_table_flags = 0; ForEachBitTable([&bit_table_flags](size_t i, auto bit_table) { if (bit_table->size() != 0) { // Record which bit-tables are stored. @@ -409,6 +415,8 @@ ScopedArenaVector<uint8_t> StackMapStream::Encode() { CHECK_EQ(code_info.GetNumberOfStackMaps(), stack_maps_.size()); CHECK_EQ(CodeInfo::HasInlineInfo(buffer.data()), inline_infos_.size() > 0); CHECK_EQ(CodeInfo::IsBaseline(buffer.data()), baseline_); + CHECK_EQ(CodeInfo::IsDebuggable(buffer.data()), debuggable_); + CHECK_EQ(CodeInfo::HasShouldDeoptimizeFlag(buffer.data()), has_should_deoptimize_flag_); // Verify all written data (usually only in debug builds). if (kVerifyStackMaps) { diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 643af2da94..f027850ce6 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -66,7 +66,8 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> { size_t fp_spill_mask, uint32_t num_dex_registers, bool baseline, - bool debuggable); + bool debuggable, + bool has_should_deoptimize_flag = false); void EndMethod(size_t code_size); void BeginStackMapEntry( @@ -129,8 +130,9 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> { uint32_t core_spill_mask_ = 0; uint32_t fp_spill_mask_ = 0; uint32_t num_dex_registers_ = 0; - bool baseline_; - bool debuggable_; + bool baseline_ = false; + bool debuggable_ = false; + bool has_should_deoptimize_flag_ = false; BitTableBuilder<StackMap> stack_maps_; BitTableBuilder<RegisterMask> register_masks_; BitmapTableBuilder stack_masks_; diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc index e266618980..d86869ce0f 100644 --- a/compiler/optimizing/x86_memory_gen.cc +++ b/compiler/optimizing/x86_memory_gen.cc @@ -33,7 +33,7 @@ class MemoryOperandVisitor final : public HGraphVisitor { private: void VisitBoundsCheck(HBoundsCheck* check) override { // Replace the length by the array itself, so that we can do compares to memory. - HArrayLength* array_len = check->InputAt(1)->AsArrayLength(); + HArrayLength* array_len = check->InputAt(1)->AsArrayLengthOrNull(); // We only want to replace an ArrayLength. if (array_len == nullptr) { diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc index a122d3c9d3..d9f56629ef 100644 --- a/compiler/trampolines/trampoline_compiler.cc +++ b/compiler/trampolines/trampoline_compiler.cc @@ -28,6 +28,10 @@ #include "utils/arm64/assembler_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_riscv64 +#include "utils/riscv64/assembler_riscv64.h" +#endif + #ifdef ART_ENABLE_CODEGEN_x86 #include "utils/x86/assembler_x86.h" #endif @@ -57,9 +61,6 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline( ArmVIXLAssembler assembler(allocator); switch (abi) { - case kInterpreterAbi: // Thread* is first argument (R0) in interpreter ABI. - ___ Ldr(pc, MemOperand(r0, offset.Int32Value())); - break; case kJniAbi: { // Load via Thread* held in JNIEnv* in first argument (R0). vixl::aarch32::UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); const vixl::aarch32::Register temp_reg = temps.Acquire(); @@ -78,7 +79,7 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline( size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(entry_stub->data(), entry_stub->size()); - __ FinalizeInstructions(code); + __ CopyInstructions(code); return std::move(entry_stub); } @@ -95,11 +96,6 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline( Arm64Assembler assembler(allocator); switch (abi) { - case kInterpreterAbi: // Thread* is first argument (X0) in interpreter ABI. - __ JumpTo(Arm64ManagedRegister::FromXRegister(X0), Offset(offset.Int32Value()), - Arm64ManagedRegister::FromXRegister(IP1)); - - break; case kJniAbi: // Load via Thread* held in JNIEnv* in first argument (X0). __ LoadRawPtr(Arm64ManagedRegister::FromXRegister(IP1), Arm64ManagedRegister::FromXRegister(X0), @@ -120,13 +116,47 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline( size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(entry_stub->data(), entry_stub->size()); - __ FinalizeInstructions(code); + __ CopyInstructions(code); return std::move(entry_stub); } } // namespace arm64 #endif // ART_ENABLE_CODEGEN_arm64 +#ifdef ART_ENABLE_CODEGEN_riscv64 +namespace riscv64 { +static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocator* allocator, + EntryPointCallingConvention abi, + ThreadOffset64 offset) { + Riscv64Assembler assembler(allocator); + ScratchRegisterScope srs(&assembler); + XRegister tmp = srs.AllocateXRegister(); + + switch (abi) { + case kJniAbi: // Load via Thread* held in JNIEnv* in first argument (A0). + __ Loadd(tmp, + A0, + JNIEnvExt::SelfOffset(static_cast<size_t>(kRiscv64PointerSize)).Int32Value()); + __ Loadd(tmp, tmp, offset.Int32Value()); + __ Jr(tmp); + break; + case kQuickAbi: // TR holds Thread*. + __ Loadd(tmp, TR, offset.Int32Value()); + __ Jr(tmp); + break; + } + + __ FinalizeCode(); + size_t cs = __ CodeSize(); + std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); + MemoryRegion code(entry_stub->data(), entry_stub->size()); + __ CopyInstructions(code); + + return std::move(entry_stub); +} +} // namespace riscv64 +#endif // ART_ENABLE_CODEGEN_riscv64 + #ifdef ART_ENABLE_CODEGEN_x86 namespace x86 { static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocator* allocator, @@ -141,7 +171,7 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocat size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(entry_stub->data(), entry_stub->size()); - __ FinalizeInstructions(code); + __ CopyInstructions(code); return std::move(entry_stub); } @@ -162,7 +192,7 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocat size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(entry_stub->data(), entry_stub->size()); - __ FinalizeInstructions(code); + __ CopyInstructions(code); return std::move(entry_stub); } @@ -179,6 +209,10 @@ std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline64(InstructionSet is case InstructionSet::kArm64: return arm64::CreateTrampoline(&allocator, abi, offset); #endif +#ifdef ART_ENABLE_CODEGEN_riscv64 + case InstructionSet::kRiscv64: + return riscv64::CreateTrampoline(&allocator, abi, offset); +#endif #ifdef ART_ENABLE_CODEGEN_x86_64 case InstructionSet::kX86_64: return x86_64::CreateTrampoline(&allocator, offset); diff --git a/compiler/trampolines/trampoline_compiler.h b/compiler/trampolines/trampoline_compiler.h index 32e35ae1d6..25b715fab0 100644 --- a/compiler/trampolines/trampoline_compiler.h +++ b/compiler/trampolines/trampoline_compiler.h @@ -28,8 +28,6 @@ namespace art HIDDEN { enum EntryPointCallingConvention { - // ABI of invocations to a method's interpreter entry point. - kInterpreterAbi, // ABI of calls to a method's native code, only used for native methods. kJniAbi, // ABI of calls to a method's quick code entry point. diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc index c7ca003530..d64de09501 100644 --- a/compiler/utils/arm/assembler_arm_vixl.cc +++ b/compiler/utils/arm/assembler_arm_vixl.cc @@ -52,7 +52,7 @@ const uint8_t* ArmVIXLAssembler::CodeBufferBaseAddress() const { return vixl_masm_.GetBuffer().GetStartAddress<const uint8_t*>(); } -void ArmVIXLAssembler::FinalizeInstructions(const MemoryRegion& region) { +void ArmVIXLAssembler::CopyInstructions(const MemoryRegion& region) { // Copy the instructions from the buffer. MemoryRegion from(vixl_masm_.GetBuffer()->GetStartAddress<void*>(), CodeSize()); region.CopyFrom(0, from); diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h index 741119d7f7..50dc06fefc 100644 --- a/compiler/utils/arm/assembler_arm_vixl.h +++ b/compiler/utils/arm/assembler_arm_vixl.h @@ -173,6 +173,30 @@ class ArmVIXLMacroAssembler final : public vixl32::MacroAssembler { } } using MacroAssembler::Vmov; + + // TODO(b/281982421): Move the implementation of Mrrc to vixl and remove this implementation. + void Mrrc(vixl32::Register r1, vixl32::Register r2, int coproc, int opc1, int crm) { + // See ARM A-profile A32/T32 Instruction set architecture + // https://developer.arm.com/documentation/ddi0597/2022-09/Base-Instructions/MRRC--Move-to-two-general-purpose-registers-from-System-register- + CHECK(coproc == 15 || coproc == 14); + if (IsUsingT32()) { + uint32_t inst = (0b111011000101 << 20) | + (r2.GetCode() << 16) | + (r1.GetCode() << 12) | + (coproc << 8) | + (opc1 << 4) | + crm; + EmitT32_32(inst); + } else { + uint32_t inst = (0b000011000101 << 20) | + (r2.GetCode() << 16) | + (r1.GetCode() << 12) | + (coproc << 8) | + (opc1 << 4) | + crm; + EmitA32(inst); + } + } }; class ArmVIXLAssembler final : public Assembler { @@ -194,12 +218,12 @@ class ArmVIXLAssembler final : public Assembler { const uint8_t* CodeBufferBaseAddress() const override; // Copy instructions out of assembly buffer into the given region of memory. - void FinalizeInstructions(const MemoryRegion& region) override; + void CopyInstructions(const MemoryRegion& region) override; - void Bind(Label* label ATTRIBUTE_UNUSED) override { + void Bind([[maybe_unused]] Label* label) override { UNIMPLEMENTED(FATAL) << "Do not use Bind(Label*) for ARM"; } - void Jump(Label* label ATTRIBUTE_UNUSED) override { + void Jump([[maybe_unused]] Label* label) override { UNIMPLEMENTED(FATAL) << "Do not use Jump(Label*) for ARM"; } diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc index 54873454eb..7a887fa064 100644 --- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc +++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc @@ -344,13 +344,13 @@ void ArmVIXLJNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs } } -void ArmVIXLJNIMacroAssembler::SignExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, - size_t size ATTRIBUTE_UNUSED) { +void ArmVIXLJNIMacroAssembler::SignExtend([[maybe_unused]] ManagedRegister mreg, + [[maybe_unused]] size_t size) { UNIMPLEMENTED(FATAL) << "no sign extension necessary for arm"; } -void ArmVIXLJNIMacroAssembler::ZeroExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, - size_t size ATTRIBUTE_UNUSED) { +void ArmVIXLJNIMacroAssembler::ZeroExtend([[maybe_unused]] ManagedRegister mreg, + [[maybe_unused]] size_t size) { UNIMPLEMENTED(FATAL) << "no zero extension necessary for arm"; } @@ -720,7 +720,7 @@ void ArmVIXLJNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests, void ArmVIXLJNIMacroAssembler::Move(ManagedRegister mdst, ManagedRegister msrc, - size_t size ATTRIBUTE_UNUSED) { + [[maybe_unused]] size_t size) { ArmManagedRegister dst = mdst.AsArm(); if (kIsDebugBuild) { // Check that the destination is not a scratch register. @@ -861,13 +861,13 @@ void ArmVIXLJNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister ___ Ldr(reg, MemOperand(reg)); } -void ArmVIXLJNIMacroAssembler::VerifyObject(ManagedRegister src ATTRIBUTE_UNUSED, - bool could_be_null ATTRIBUTE_UNUSED) { +void ArmVIXLJNIMacroAssembler::VerifyObject([[maybe_unused]] ManagedRegister src, + [[maybe_unused]] bool could_be_null) { // TODO: not validating references. } -void ArmVIXLJNIMacroAssembler::VerifyObject(FrameOffset src ATTRIBUTE_UNUSED, - bool could_be_null ATTRIBUTE_UNUSED) { +void ArmVIXLJNIMacroAssembler::VerifyObject([[maybe_unused]] FrameOffset src, + [[maybe_unused]] bool could_be_null) { // TODO: not validating references. } diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index 26dce7c502..13acc7c852 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -79,7 +79,7 @@ const uint8_t* Arm64Assembler::CodeBufferBaseAddress() const { return vixl_masm_.GetBuffer().GetStartAddress<const uint8_t*>(); } -void Arm64Assembler::FinalizeInstructions(const MemoryRegion& region) { +void Arm64Assembler::CopyInstructions(const MemoryRegion& region) { // Copy the instructions from the buffer. MemoryRegion from(vixl_masm_.GetBuffer()->GetStartAddress<void*>(), CodeSize()); region.CopyFrom(0, from); diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h index f8168903bd..ad6a8edadf 100644 --- a/compiler/utils/arm64/assembler_arm64.h +++ b/compiler/utils/arm64/assembler_arm64.h @@ -91,7 +91,7 @@ class Arm64Assembler final : public Assembler { const uint8_t* CodeBufferBaseAddress() const override; // Copy instructions out of assembly buffer into the given region of memory. - void FinalizeInstructions(const MemoryRegion& region) override; + void CopyInstructions(const MemoryRegion& region) override; void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs); @@ -145,10 +145,10 @@ class Arm64Assembler final : public Assembler { // MaybeGenerateMarkingRegisterCheck and is passed to the BRK instruction. void GenerateMarkingRegisterCheck(vixl::aarch64::Register temp, int code = 0); - void Bind(Label* label ATTRIBUTE_UNUSED) override { + void Bind([[maybe_unused]] Label* label) override { UNIMPLEMENTED(FATAL) << "Do not use Bind(Label*) for ARM64"; } - void Jump(Label* label ATTRIBUTE_UNUSED) override { + void Jump([[maybe_unused]] Label* label) override { UNIMPLEMENTED(FATAL) << "Do not use Jump(Label*) for ARM64"; } diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc index 9e9f122cf6..c5380695d9 100644 --- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc +++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc @@ -705,7 +705,7 @@ void Arm64JNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister m } void Arm64JNIMacroAssembler::TryToTransitionFromRunnableToNative( - JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs ATTRIBUTE_UNUSED) { + JNIMacroLabel* label, [[maybe_unused]] ArrayRef<const ManagedRegister> scratch_regs) { constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative); constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable); constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kArm64PointerSize>(); @@ -734,8 +734,8 @@ void Arm64JNIMacroAssembler::TryToTransitionFromRunnableToNative( void Arm64JNIMacroAssembler::TryToTransitionFromNativeToRunnable( JNIMacroLabel* label, - ArrayRef<const ManagedRegister> scratch_regs ATTRIBUTE_UNUSED, - ManagedRegister return_reg ATTRIBUTE_UNUSED) { + [[maybe_unused]] ArrayRef<const ManagedRegister> scratch_regs, + [[maybe_unused]] ManagedRegister return_reg) { constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative); constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable); constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kArm64PointerSize>(); diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc index b82f0dc4b4..1c04a3d20b 100644 --- a/compiler/utils/assembler.cc +++ b/compiler/utils/assembler.cc @@ -57,18 +57,21 @@ void AssemblerBuffer::ProcessFixups(const MemoryRegion& region) { fixup->Process(region, fixup->position()); fixup = fixup->previous(); } +#ifndef NDEBUG + fixups_processed_ = true; +#endif +} + + +void AssemblerBuffer::ProcessFixups() { + MemoryRegion from(reinterpret_cast<void*>(contents()), Size()); + ProcessFixups(from); } -void AssemblerBuffer::FinalizeInstructions(const MemoryRegion& instructions) { - // Copy the instructions from the buffer. +void AssemblerBuffer::CopyInstructions(const MemoryRegion& instructions) { MemoryRegion from(reinterpret_cast<void*>(contents()), Size()); instructions.CopyFrom(0, from); - // Process fixups in the instructions. - ProcessFixups(instructions); -#ifndef NDEBUG - fixups_processed_ = true; -#endif } diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h index 13a5d9fd01..f3fa711dbb 100644 --- a/compiler/utils/assembler.h +++ b/compiler/utils/assembler.h @@ -163,9 +163,8 @@ class AssemblerBuffer { uint8_t* contents() const { return contents_; } - // Copy the assembled instructions into the specified memory block - // and apply all fixups. - void FinalizeInstructions(const MemoryRegion& region); + // Copy the assembled instructions into the specified memory block. + void CopyInstructions(const MemoryRegion& region); // To emit an instruction to the assembler buffer, the EnsureCapacity helper // must be used to guarantee that the underlying data area is big enough to @@ -246,6 +245,8 @@ class AssemblerBuffer { // The provided `min_capacity` must be higher than current `Capacity()`. void ExtendCapacity(size_t min_capacity); + void ProcessFixups(); + private: // The limit is set to kMinimumGap bytes before the end of the data area. // This leaves enough space for the longest possible instruction and allows @@ -357,7 +358,10 @@ class DebugFrameOpCodeWriterForAssembler final class Assembler : public DeletableArenaObject<kArenaAllocAssembler> { public: // Finalize the code; emit slow paths, fixup branches, add literal pool, etc. - virtual void FinalizeCode() { buffer_.EmitSlowPaths(this); } + virtual void FinalizeCode() { + buffer_.EmitSlowPaths(this); + buffer_.ProcessFixups(); + } // Size of generated code virtual size_t CodeSize() const { return buffer_.Size(); } @@ -375,12 +379,12 @@ class Assembler : public DeletableArenaObject<kArenaAllocAssembler> { virtual size_t CodePosition() { return CodeSize(); } // Copy instructions out of assembly buffer into the given region of memory - virtual void FinalizeInstructions(const MemoryRegion& region) { - buffer_.FinalizeInstructions(region); + virtual void CopyInstructions(const MemoryRegion& region) { + buffer_.CopyInstructions(region); } // TODO: Implement with disassembler. - virtual void Comment(const char* format ATTRIBUTE_UNUSED, ...) {} + virtual void Comment([[maybe_unused]] const char* format, ...) {} virtual void Bind(Label* label) = 0; virtual void Jump(Label* label) = 0; diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index d03e5a7abc..72f48367a6 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -26,6 +26,7 @@ #include <fstream> #include <iterator> +#include "base/array_ref.h" #include "base/macros.h" #include "base/malloc_arena_pool.h" #include "assembler_test_base.h" @@ -200,8 +201,8 @@ class AssemblerTest : public AssemblerTestBase { template <typename Reg1, typename Reg2, typename ImmType> std::string RepeatTemplatedRegistersImmBits(void (Ass::*f)(Reg1, Reg2, ImmType), int imm_bits, - const std::vector<Reg1*> reg1_registers, - const std::vector<Reg2*> reg2_registers, + ArrayRef<const Reg1> reg1_registers, + ArrayRef<const Reg2> reg2_registers, std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), const std::string& fmt, @@ -215,48 +216,28 @@ class AssemblerTest : public AssemblerTestBase { for (int64_t imm : imms) { ImmType new_imm = CreateImmediate(imm); if (f != nullptr) { - (assembler_.get()->*f)(*reg1, *reg2, new_imm * multiplier + bias); + (assembler_.get()->*f)(reg1, reg2, new_imm * multiplier + bias); } std::string base = fmt; - std::string reg1_string = (this->*GetName1)(*reg1); - size_t reg1_index; - while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { - base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); - } - - std::string reg2_string = (this->*GetName2)(*reg2); - size_t reg2_index; - while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { - base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); - } + ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base); + ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base); + ReplaceImm(imm, bias, multiplier, &base); - size_t imm_index = base.find(IMM_TOKEN); - if (imm_index != std::string::npos) { - std::ostringstream sreg; - sreg << imm * multiplier + bias; - std::string imm_string = sreg.str(); - base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); - } - - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } } } - // Add a newline at the end. - str += "\n"; return str; } template <typename Reg1, typename Reg2, typename Reg3, typename ImmType> std::string RepeatTemplatedRegistersImmBits(void (Ass::*f)(Reg1, Reg2, Reg3, ImmType), int imm_bits, - const std::vector<Reg1*> reg1_registers, - const std::vector<Reg2*> reg2_registers, - const std::vector<Reg3*> reg3_registers, + ArrayRef<const Reg1> reg1_registers, + ArrayRef<const Reg2> reg2_registers, + ArrayRef<const Reg3> reg3_registers, std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), std::string (AssemblerTest::*GetName3)(const Reg3&), @@ -271,53 +252,28 @@ class AssemblerTest : public AssemblerTestBase { for (int64_t imm : imms) { ImmType new_imm = CreateImmediate(imm); if (f != nullptr) { - (assembler_.get()->*f)(*reg1, *reg2, *reg3, new_imm + bias); + (assembler_.get()->*f)(reg1, reg2, reg3, new_imm + bias); } std::string base = fmt; - std::string reg1_string = (this->*GetName1)(*reg1); - size_t reg1_index; - while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { - base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); - } - - std::string reg2_string = (this->*GetName2)(*reg2); - size_t reg2_index; - while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { - base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); - } - - std::string reg3_string = (this->*GetName3)(*reg3); - size_t reg3_index; - while ((reg3_index = base.find(REG3_TOKEN)) != std::string::npos) { - base.replace(reg3_index, ConstexprStrLen(REG3_TOKEN), reg3_string); - } - - size_t imm_index = base.find(IMM_TOKEN); - if (imm_index != std::string::npos) { - std::ostringstream sreg; - sreg << imm + bias; - std::string imm_string = sreg.str(); - base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); - } + ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base); + ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base); + ReplaceReg(REG3_TOKEN, (this->*GetName3)(reg3), &base); + ReplaceImm(imm, bias, /*multiplier=*/ 1, &base); - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } } } } - // Add a newline at the end. - str += "\n"; return str; } template <typename ImmType, typename Reg1, typename Reg2> std::string RepeatTemplatedImmBitsRegisters(void (Ass::*f)(ImmType, Reg1, Reg2), - const std::vector<Reg1*> reg1_registers, - const std::vector<Reg2*> reg2_registers, + ArrayRef<const Reg1> reg1_registers, + ArrayRef<const Reg2> reg2_registers, std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), int imm_bits, @@ -332,46 +288,26 @@ class AssemblerTest : public AssemblerTestBase { for (int64_t imm : imms) { ImmType new_imm = CreateImmediate(imm); if (f != nullptr) { - (assembler_.get()->*f)(new_imm, *reg1, *reg2); + (assembler_.get()->*f)(new_imm, reg1, reg2); } std::string base = fmt; - std::string reg1_string = (this->*GetName1)(*reg1); - size_t reg1_index; - while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { - base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); - } - - std::string reg2_string = (this->*GetName2)(*reg2); - size_t reg2_index; - while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { - base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); - } - - size_t imm_index = base.find(IMM_TOKEN); - if (imm_index != std::string::npos) { - std::ostringstream sreg; - sreg << imm; - std::string imm_string = sreg.str(); - base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); - } + ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base); + ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base); + ReplaceImm(imm, /*bias=*/ 0, /*multiplier=*/ 1, &base); - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } } } - // Add a newline at the end. - str += "\n"; return str; } template <typename RegType, typename ImmType> std::string RepeatTemplatedRegisterImmBits(void (Ass::*f)(RegType, ImmType), int imm_bits, - const std::vector<RegType*> registers, + ArrayRef<const RegType> registers, std::string (AssemblerTest::*GetName)(const RegType&), const std::string& fmt, int bias) { @@ -382,36 +318,148 @@ class AssemblerTest : public AssemblerTestBase { for (int64_t imm : imms) { ImmType new_imm = CreateImmediate(imm); if (f != nullptr) { - (assembler_.get()->*f)(*reg, new_imm + bias); + (assembler_.get()->*f)(reg, new_imm + bias); } std::string base = fmt; - std::string reg_string = (this->*GetName)(*reg); - size_t reg_index; - while ((reg_index = base.find(REG_TOKEN)) != std::string::npos) { - base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string); - } + ReplaceReg(REG_TOKEN, (this->*GetName)(reg), &base); + ReplaceImm(imm, bias, /*multiplier=*/ 1, &base); - size_t imm_index = base.find(IMM_TOKEN); - if (imm_index != std::string::npos) { - std::ostringstream sreg; - sreg << imm + bias; - std::string imm_string = sreg.str(); - base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); + str += base; + str += "\n"; + } + } + return str; + } + + template <typename RegType, typename ImmType> + std::string RepeatTemplatedRegisterImmBitsShift( + void (Ass::*f)(RegType, ImmType), + int imm_bits, + int shift, + ArrayRef<const RegType> registers, + std::string (AssemblerTest::*GetName)(const RegType&), + const std::string& fmt, + int bias) { + std::string str; + std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0), shift); + + for (auto reg : registers) { + for (int64_t imm : imms) { + ImmType new_imm = CreateImmediate(imm); + if (f != nullptr) { + (assembler_.get()->*f)(reg, new_imm + bias); } + std::string base = fmt; + + ReplaceReg(REG_TOKEN, (this->*GetName)(reg), &base); + ReplaceImm(imm, bias, /*multiplier=*/ 1, &base); + + str += base; + str += "\n"; + } + } + return str; + } + + template <typename ImmType> + std::string RepeatTemplatedImmBitsShift( + void (Ass::*f)(ImmType), int imm_bits, int shift, const std::string& fmt, int bias = 0) { + std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0), shift); + + WarnOnCombinations(imms.size()); + + std::string str; - if (str.size() > 0) { + for (int64_t imm : imms) { + ImmType new_imm = CreateImmediate(imm); + if (f != nullptr) { + (assembler_.get()->*f)(new_imm + bias); + } + std::string base = fmt; + + ReplaceImm(imm, bias, /*multiplier=*/ 1, &base); + + str += base; + str += "\n"; + } + return str; + } + + template <typename Reg1, typename Reg2, typename ImmType> + std::string RepeatTemplatedRegistersImmBitsShift( + void (Ass::*f)(Reg1, Reg2, ImmType), + int imm_bits, + int shift, + ArrayRef<const Reg1> reg1_registers, + ArrayRef<const Reg2> reg2_registers, + std::string (AssemblerTest::*GetName1)(const Reg1&), + std::string (AssemblerTest::*GetName2)(const Reg2&), + const std::string& fmt, + int bias = 0, + int multiplier = 1) { + std::string str; + std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0), shift); + + for (auto reg1 : reg1_registers) { + for (auto reg2 : reg2_registers) { + for (int64_t imm : imms) { + ImmType new_imm = CreateImmediate(imm); + if (f != nullptr) { + (assembler_.get()->*f)(reg1, reg2, new_imm * multiplier + bias); + } + std::string base = fmt; + + ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base); + ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base); + ReplaceImm(imm, bias, multiplier, &base); + + str += base; str += "\n"; } - str += base; } } - // Add a newline at the end. - str += "\n"; return str; } template <typename ImmType> + std::string RepeatIbS( + void (Ass::*f)(ImmType), int imm_bits, int shift, const std::string& fmt, int bias = 0) { + return RepeatTemplatedImmBitsShift<ImmType>(f, imm_bits, shift, fmt, bias); + } + + template <typename ImmType> + std::string RepeatRIbS( + void (Ass::*f)(Reg, ImmType), int imm_bits, int shift, const std::string& fmt, int bias = 0) { + return RepeatTemplatedRegisterImmBitsShift<Reg, ImmType>( + f, + imm_bits, + shift, + GetRegisters(), + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + fmt, + bias); + } + + template <typename ImmType> + std::string RepeatRRIbS(void (Ass::*f)(Reg, Reg, ImmType), + int imm_bits, + int shift, + const std::string& fmt, + int bias = 0) { + return RepeatTemplatedRegistersImmBitsShift<Reg, Reg, ImmType>( + f, + imm_bits, + shift, + GetRegisters(), + GetRegisters(), + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + fmt, + bias); + } + + template <typename ImmType> std::string RepeatRRIb(void (Ass::*f)(Reg, Reg, ImmType), int imm_bits, const std::string& fmt, @@ -488,6 +536,19 @@ class AssemblerTest : public AssemblerTestBase { fmt); } + std::string RepeatFFFF(void (Ass::*f)(FPReg, FPReg, FPReg, FPReg), const std::string& fmt) { + return RepeatTemplatedRegisters<FPReg, FPReg, FPReg, FPReg>(f, + GetFPRegisters(), + GetFPRegisters(), + GetFPRegisters(), + GetFPRegisters(), + &AssemblerTest::GetFPRegName, + &AssemblerTest::GetFPRegName, + &AssemblerTest::GetFPRegName, + &AssemblerTest::GetFPRegName, + fmt); + } + std::string RepeatFFR(void (Ass::*f)(FPReg, FPReg, Reg), const std::string& fmt) { return RepeatTemplatedRegisters<FPReg, FPReg, Reg>( f, @@ -538,6 +599,32 @@ class AssemblerTest : public AssemblerTestBase { fmt); } + std::string RepeatRFF(void (Ass::*f)(Reg, FPReg, FPReg), const std::string& fmt) { + return RepeatTemplatedRegisters<Reg, FPReg, FPReg>( + f, + GetRegisters(), + GetFPRegisters(), + GetFPRegisters(), + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + &AssemblerTest::GetFPRegName, + &AssemblerTest::GetFPRegName, + fmt); + } + + template <typename ImmType> + std::string RepeatRFIb(void (Ass::*f)(Reg, FPReg, ImmType), + int imm_bits, + const std::string& fmt) { + return RepeatTemplatedRegistersImmBits<Reg, FPReg, ImmType>( + f, + imm_bits, + GetRegisters(), + GetFPRegisters(), + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + &AssemblerTest::GetFPRegName, + fmt); + } + std::string RepeatFR(void (Ass::*f)(FPReg, Reg), const std::string& fmt) { return RepeatTemplatedRegisters<FPReg, Reg>(f, GetFPRegisters(), @@ -590,21 +677,11 @@ class AssemblerTest : public AssemblerTestBase { } std::string base = fmt; - size_t imm_index = base.find(IMM_TOKEN); - if (imm_index != std::string::npos) { - std::ostringstream sreg; - sreg << imm; - std::string imm_string = sreg.str(); - base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); - } + ReplaceImm(imm, /*bias=*/ 0, /*multiplier=*/ 1, &base); - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } - // Add a newline at the end. - str += "\n"; return str; } @@ -710,36 +787,36 @@ class AssemblerTest : public AssemblerTestBase { // Returns a vector of registers used by any of the repeat methods // involving an "R" (e.g. RepeatR). - virtual std::vector<Reg*> GetRegisters() = 0; + virtual ArrayRef<const Reg> GetRegisters() = 0; // Returns a vector of fp-registers used by any of the repeat methods // involving an "F" (e.g. RepeatFF). - virtual std::vector<FPReg*> GetFPRegisters() { + virtual ArrayRef<const FPReg> GetFPRegisters() { UNIMPLEMENTED(FATAL) << "Architecture does not support floating-point registers"; UNREACHABLE(); } // Returns a vector of dedicated simd-registers used by any of the repeat // methods involving an "V" (e.g. RepeatVV). - virtual std::vector<VecReg*> GetVectorRegisters() { + virtual ArrayRef<const VecReg> GetVectorRegisters() { UNIMPLEMENTED(FATAL) << "Architecture does not support vector registers"; UNREACHABLE(); } // Secondary register names are the secondary view on registers, e.g., 32b on 64b systems. - virtual std::string GetSecondaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) { + virtual std::string GetSecondaryRegisterName([[maybe_unused]] const Reg& reg) { UNIMPLEMENTED(FATAL) << "Architecture does not support secondary registers"; UNREACHABLE(); } // Tertiary register names are the tertiary view on registers, e.g., 16b on 64b systems. - virtual std::string GetTertiaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) { + virtual std::string GetTertiaryRegisterName([[maybe_unused]] const Reg& reg) { UNIMPLEMENTED(FATAL) << "Architecture does not support tertiary registers"; UNREACHABLE(); } // Quaternary register names are the quaternary view on registers, e.g., 8b on 64b systems. - virtual std::string GetQuaternaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) { + virtual std::string GetQuaternaryRegisterName([[maybe_unused]] const Reg& reg) { UNIMPLEMENTED(FATAL) << "Architecture does not support quaternary registers"; UNREACHABLE(); } @@ -818,7 +895,9 @@ class AssemblerTest : public AssemblerTestBase { const int kMaxBitsExhaustiveTest = 8; // Create a couple of immediate values up to the number of bits given. - virtual std::vector<int64_t> CreateImmediateValuesBits(const int imm_bits, bool as_uint = false) { + virtual std::vector<int64_t> CreateImmediateValuesBits(const int imm_bits, + bool as_uint = false, + int shift = 0) { CHECK_GT(imm_bits, 0); CHECK_LE(imm_bits, 64); std::vector<int64_t> res; @@ -826,11 +905,11 @@ class AssemblerTest : public AssemblerTestBase { if (imm_bits <= kMaxBitsExhaustiveTest) { if (as_uint) { for (uint64_t i = MinInt<uint64_t>(imm_bits); i <= MaxInt<uint64_t>(imm_bits); i++) { - res.push_back(static_cast<int64_t>(i)); + res.push_back(static_cast<int64_t>(i << shift)); } } else { for (int64_t i = MinInt<int64_t>(imm_bits); i <= MaxInt<int64_t>(imm_bits); i++) { - res.push_back(i); + res.push_back(i << shift); } } } else { @@ -838,14 +917,14 @@ class AssemblerTest : public AssemblerTestBase { for (uint64_t i = MinInt<uint64_t>(kMaxBitsExhaustiveTest); i <= MaxInt<uint64_t>(kMaxBitsExhaustiveTest); i++) { - res.push_back(static_cast<int64_t>(i)); + res.push_back(static_cast<int64_t>(i << shift)); } for (int i = 0; i <= imm_bits; i++) { uint64_t j = (MaxInt<uint64_t>(kMaxBitsExhaustiveTest) + 1) + ((MaxInt<uint64_t>(imm_bits) - (MaxInt<uint64_t>(kMaxBitsExhaustiveTest) + 1)) * i / imm_bits); - res.push_back(static_cast<int64_t>(j)); + res.push_back(static_cast<int64_t>(j << shift)); } } else { for (int i = 0; i <= imm_bits; i++) { @@ -853,18 +932,18 @@ class AssemblerTest : public AssemblerTestBase { ((((MinInt<int64_t>(kMaxBitsExhaustiveTest) - 1) - MinInt<int64_t>(imm_bits)) * i) / imm_bits); - res.push_back(static_cast<int64_t>(j)); + res.push_back(static_cast<int64_t>(j << shift)); } for (int64_t i = MinInt<int64_t>(kMaxBitsExhaustiveTest); i <= MaxInt<int64_t>(kMaxBitsExhaustiveTest); i++) { - res.push_back(static_cast<int64_t>(i)); + res.push_back(static_cast<int64_t>(i << shift)); } for (int i = 0; i <= imm_bits; i++) { int64_t j = (MaxInt<int64_t>(kMaxBitsExhaustiveTest) + 1) + ((MaxInt<int64_t>(imm_bits) - (MaxInt<int64_t>(kMaxBitsExhaustiveTest) + 1)) * i / imm_bits); - res.push_back(static_cast<int64_t>(j)); + res.push_back(static_cast<int64_t>(j << shift)); } } } @@ -1111,19 +1190,11 @@ class AssemblerTest : public AssemblerTestBase { } std::string base = fmt; - std::string addr_string = (this->*GetAName)(addr); - size_t addr_index; - if ((addr_index = base.find(ADDRESS_TOKEN)) != std::string::npos) { - base.replace(addr_index, ConstexprStrLen(ADDRESS_TOKEN), addr_string); - } + ReplaceAddr((this->*GetAName)(addr), &base); - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } - // Add a newline at the end. - str += "\n"; return str; } @@ -1144,34 +1215,19 @@ class AssemblerTest : public AssemblerTestBase { } std::string base = fmt; - std::string addr_string = (this->*GetAName)(addr); - size_t addr_index; - if ((addr_index = base.find(ADDRESS_TOKEN)) != std::string::npos) { - base.replace(addr_index, ConstexprStrLen(ADDRESS_TOKEN), addr_string); - } - - size_t imm_index = base.find(IMM_TOKEN); - if (imm_index != std::string::npos) { - std::ostringstream sreg; - sreg << imm; - std::string imm_string = sreg.str(); - base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); - } + ReplaceAddr((this->*GetAName)(addr), &base); + ReplaceImm(imm, /*bias=*/ 0, /*multiplier=*/ 1, &base); - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } } - // Add a newline at the end. - str += "\n"; return str; } template <typename RegType, typename AddrType> std::string RepeatTemplatedRegMem(void (Ass::*f)(RegType, const AddrType&), - const std::vector<RegType*> registers, + ArrayRef<const RegType> registers, const std::vector<AddrType> addresses, std::string (AssemblerTest::*GetRName)(const RegType&), std::string (AssemblerTest::*GetAName)(const AddrType&), @@ -1181,37 +1237,24 @@ class AssemblerTest : public AssemblerTestBase { for (auto reg : registers) { for (auto addr : addresses) { if (f != nullptr) { - (assembler_.get()->*f)(*reg, addr); + (assembler_.get()->*f)(reg, addr); } std::string base = fmt; - std::string reg_string = (this->*GetRName)(*reg); - size_t reg_index; - if ((reg_index = base.find(REG_TOKEN)) != std::string::npos) { - base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string); - } - - std::string addr_string = (this->*GetAName)(addr); - size_t addr_index; - if ((addr_index = base.find(ADDRESS_TOKEN)) != std::string::npos) { - base.replace(addr_index, ConstexprStrLen(ADDRESS_TOKEN), addr_string); - } + ReplaceReg(REG_TOKEN, (this->*GetRName)(reg), &base); + ReplaceAddr((this->*GetAName)(addr), &base); - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } } - // Add a newline at the end. - str += "\n"; return str; } template <typename AddrType, typename RegType> std::string RepeatTemplatedMemReg(void (Ass::*f)(const AddrType&, RegType), const std::vector<AddrType> addresses, - const std::vector<RegType*> registers, + ArrayRef<const RegType> registers, std::string (AssemblerTest::*GetAName)(const AddrType&), std::string (AssemblerTest::*GetRName)(const RegType&), const std::string& fmt) { @@ -1220,30 +1263,17 @@ class AssemblerTest : public AssemblerTestBase { for (auto addr : addresses) { for (auto reg : registers) { if (f != nullptr) { - (assembler_.get()->*f)(addr, *reg); + (assembler_.get()->*f)(addr, reg); } std::string base = fmt; - std::string addr_string = (this->*GetAName)(addr); - size_t addr_index; - if ((addr_index = base.find(ADDRESS_TOKEN)) != std::string::npos) { - base.replace(addr_index, ConstexprStrLen(ADDRESS_TOKEN), addr_string); - } + ReplaceAddr((this->*GetAName)(addr), &base); + ReplaceReg(REG_TOKEN, (this->*GetRName)(reg), &base); - std::string reg_string = (this->*GetRName)(*reg); - size_t reg_index; - if ((reg_index = base.find(REG_TOKEN)) != std::string::npos) { - base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string); - } - - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } } - // Add a newline at the end. - str += "\n"; return str; } @@ -1253,36 +1283,28 @@ class AssemblerTest : public AssemblerTestBase { template <typename RegType> std::string RepeatTemplatedRegister(void (Ass::*f)(RegType), - const std::vector<RegType*> registers, + ArrayRef<const RegType> registers, std::string (AssemblerTest::*GetName)(const RegType&), const std::string& fmt) { std::string str; for (auto reg : registers) { if (f != nullptr) { - (assembler_.get()->*f)(*reg); + (assembler_.get()->*f)(reg); } std::string base = fmt; - std::string reg_string = (this->*GetName)(*reg); - size_t reg_index; - if ((reg_index = base.find(REG_TOKEN)) != std::string::npos) { - base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string); - } + ReplaceReg(REG_TOKEN, (this->*GetName)(reg), &base); - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } - // Add a newline at the end. - str += "\n"; return str; } template <typename Reg1, typename Reg2> std::string RepeatTemplatedRegisters(void (Ass::*f)(Reg1, Reg2), - const std::vector<Reg1*> reg1_registers, - const std::vector<Reg2*> reg2_registers, + ArrayRef<const Reg1> reg1_registers, + ArrayRef<const Reg2> reg2_registers, std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), const std::string& fmt, @@ -1294,44 +1316,31 @@ class AssemblerTest : public AssemblerTestBase { for (auto reg2 : reg2_registers) { // Check if this register pair is on the exception list. If so, skip it. if (except != nullptr) { - const auto& pair = std::make_pair(*reg1, *reg2); + const auto& pair = std::make_pair(reg1, reg2); if (std::find(except->begin(), except->end(), pair) != except->end()) { continue; } } if (f != nullptr) { - (assembler_.get()->*f)(*reg1, *reg2); + (assembler_.get()->*f)(reg1, reg2); } std::string base = fmt; - std::string reg1_string = (this->*GetName1)(*reg1); - size_t reg1_index; - while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { - base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); - } - - std::string reg2_string = (this->*GetName2)(*reg2); - size_t reg2_index; - while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { - base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); - } + ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base); + ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base); - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } } - // Add a newline at the end. - str += "\n"; return str; } template <typename Reg1, typename Reg2> std::string RepeatTemplatedRegistersNoDupes(void (Ass::*f)(Reg1, Reg2), - const std::vector<Reg1*> reg1_registers, - const std::vector<Reg2*> reg2_registers, + ArrayRef<const Reg1> reg1_registers, + ArrayRef<const Reg2> reg2_registers, std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), const std::string& fmt) { @@ -1342,38 +1351,25 @@ class AssemblerTest : public AssemblerTestBase { for (auto reg2 : reg2_registers) { if (reg1 == reg2) continue; if (f != nullptr) { - (assembler_.get()->*f)(*reg1, *reg2); + (assembler_.get()->*f)(reg1, reg2); } std::string base = fmt; - std::string reg1_string = (this->*GetName1)(*reg1); - size_t reg1_index; - while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { - base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); - } - - std::string reg2_string = (this->*GetName2)(*reg2); - size_t reg2_index; - while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { - base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); - } + ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base); + ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base); - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } } - // Add a newline at the end. - str += "\n"; return str; } template <typename Reg1, typename Reg2, typename Reg3> std::string RepeatTemplatedRegisters(void (Ass::*f)(Reg1, Reg2, Reg3), - const std::vector<Reg1*> reg1_registers, - const std::vector<Reg2*> reg2_registers, - const std::vector<Reg3*> reg3_registers, + ArrayRef<const Reg1> reg1_registers, + ArrayRef<const Reg2> reg2_registers, + ArrayRef<const Reg3> reg3_registers, std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), std::string (AssemblerTest::*GetName3)(const Reg3&), @@ -1383,44 +1379,61 @@ class AssemblerTest : public AssemblerTestBase { for (auto reg2 : reg2_registers) { for (auto reg3 : reg3_registers) { if (f != nullptr) { - (assembler_.get()->*f)(*reg1, *reg2, *reg3); + (assembler_.get()->*f)(reg1, reg2, reg3); } std::string base = fmt; - std::string reg1_string = (this->*GetName1)(*reg1); - size_t reg1_index; - while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { - base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); - } + ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base); + ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base); + ReplaceReg(REG3_TOKEN, (this->*GetName3)(reg3), &base); - std::string reg2_string = (this->*GetName2)(*reg2); - size_t reg2_index; - while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { - base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); - } + str += base; + str += "\n"; + } + } + } + return str; + } - std::string reg3_string = (this->*GetName3)(*reg3); - size_t reg3_index; - while ((reg3_index = base.find(REG3_TOKEN)) != std::string::npos) { - base.replace(reg3_index, ConstexprStrLen(REG3_TOKEN), reg3_string); - } + template <typename Reg1, typename Reg2, typename Reg3, typename Reg4> + std::string RepeatTemplatedRegisters(void (Ass::*f)(Reg1, Reg2, Reg3, Reg4), + ArrayRef<const Reg1> reg1_registers, + ArrayRef<const Reg2> reg2_registers, + ArrayRef<const Reg3> reg3_registers, + ArrayRef<const Reg4> reg4_registers, + std::string (AssemblerTest::*GetName1)(const Reg1&), + std::string (AssemblerTest::*GetName2)(const Reg2&), + std::string (AssemblerTest::*GetName3)(const Reg3&), + std::string (AssemblerTest::*GetName4)(const Reg4&), + const std::string& fmt) { + std::string str; + for (auto reg1 : reg1_registers) { + for (auto reg2 : reg2_registers) { + for (auto reg3 : reg3_registers) { + for (auto reg4 : reg4_registers) { + if (f != nullptr) { + (assembler_.get()->*f)(reg1, reg2, reg3, reg4); + } + std::string base = fmt; - if (str.size() > 0) { + ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base); + ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base); + ReplaceReg(REG3_TOKEN, (this->*GetName3)(reg3), &base); + ReplaceReg(REG4_TOKEN, (this->*GetName4)(reg4), &base); + + str += base; str += "\n"; } - str += base; } } } - // Add a newline at the end. - str += "\n"; return str; } template <typename Reg1, typename Reg2> std::string RepeatTemplatedRegistersImm(void (Ass::*f)(Reg1, Reg2, const Imm&), - const std::vector<Reg1*> reg1_registers, - const std::vector<Reg2*> reg2_registers, + ArrayRef<const Reg1> reg1_registers, + ArrayRef<const Reg2> reg2_registers, std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), size_t imm_bytes, @@ -1434,39 +1447,19 @@ class AssemblerTest : public AssemblerTestBase { for (int64_t imm : imms) { Imm new_imm = CreateImmediate(imm); if (f != nullptr) { - (assembler_.get()->*f)(*reg1, *reg2, new_imm); + (assembler_.get()->*f)(reg1, reg2, new_imm); } std::string base = fmt; - std::string reg1_string = (this->*GetName1)(*reg1); - size_t reg1_index; - while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { - base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); - } - - std::string reg2_string = (this->*GetName2)(*reg2); - size_t reg2_index; - while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { - base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); - } - - size_t imm_index = base.find(IMM_TOKEN); - if (imm_index != std::string::npos) { - std::ostringstream sreg; - sreg << imm; - std::string imm_string = sreg.str(); - base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); - } + ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base); + ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base); + ReplaceImm(imm, /*bias=*/ 0, /*multiplier=*/ 1, &base); - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } } } - // Add a newline at the end. - str += "\n"; return str; } @@ -1517,11 +1510,41 @@ class AssemblerTest : public AssemblerTestBase { } } + static void ReplaceReg(const std::string& reg_token, + const std::string& replacement, + /*inout*/ std::string* str) { + size_t reg_index; + while ((reg_index = str->find(reg_token)) != std::string::npos) { + str->replace(reg_index, reg_token.length(), replacement); + } + } + + static void ReplaceImm(int64_t imm, + int64_t bias, + int64_t multiplier, + /*inout*/ std::string* str) { + size_t imm_index = str->find(IMM_TOKEN); + if (imm_index != std::string::npos) { + std::ostringstream sreg; + sreg << imm * multiplier + bias; + std::string imm_string = sreg.str(); + str->replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); + } + } + + static void ReplaceAddr(const std::string& replacement, /*inout*/ std::string* str) { + size_t addr_index; + if ((addr_index = str->find(ADDRESS_TOKEN)) != std::string::npos) { + str->replace(addr_index, ConstexprStrLen(ADDRESS_TOKEN), replacement); + } + } + static constexpr const char* ADDRESS_TOKEN = "{mem}"; static constexpr const char* REG_TOKEN = "{reg}"; static constexpr const char* REG1_TOKEN = "{reg1}"; static constexpr const char* REG2_TOKEN = "{reg2}"; static constexpr const char* REG3_TOKEN = "{reg3}"; + static constexpr const char* REG4_TOKEN = "{reg4}"; static constexpr const char* IMM_TOKEN = "{imm}"; private: @@ -1529,7 +1552,7 @@ class AssemblerTest : public AssemblerTestBase { std::string RepeatRegisterImm(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, const std::string& fmt) { - const std::vector<Reg*> registers = GetRegisters(); + ArrayRef<const Reg> registers = GetRegisters(); std::string str; std::vector<int64_t> imms = CreateImmediateValues(imm_bytes); @@ -1539,45 +1562,29 @@ class AssemblerTest : public AssemblerTestBase { for (int64_t imm : imms) { Imm new_imm = CreateImmediate(imm); if (f != nullptr) { - (assembler_.get()->*f)(*reg, new_imm); + (assembler_.get()->*f)(reg, new_imm); } std::string base = fmt; - std::string reg_string = GetRegName<kRegView>(*reg); - size_t reg_index; - while ((reg_index = base.find(REG_TOKEN)) != std::string::npos) { - base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string); - } + ReplaceReg(REG_TOKEN, GetRegName<kRegView>(reg), &base); + ReplaceImm(imm, /*bias=*/ 0, /*multiplier=*/ 1, &base); - size_t imm_index = base.find(IMM_TOKEN); - if (imm_index != std::string::npos) { - std::ostringstream sreg; - sreg << imm; - std::string imm_string = sreg.str(); - base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); - } - - if (str.size() > 0) { - str += "\n"; - } str += base; + str += "\n"; } } - // Add a newline at the end. - str += "\n"; return str; } // Override this to pad the code with NOPs to a certain size if needed. - virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) { - } + virtual void Pad([[maybe_unused]] std::vector<uint8_t>& data) {} void DriverWrapper(const std::string& assembly_text, const std::string& test_name) { assembler_->FinalizeCode(); size_t cs = assembler_->CodeSize(); std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*data)[0], data->size()); - assembler_->FinalizeInstructions(code); + assembler_->CopyInstructions(code); Pad(*data); Driver(*data, assembly_text, test_name); } diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h index 73f3657413..6f836d3718 100644 --- a/compiler/utils/assembler_test_base.h +++ b/compiler/utils/assembler_test_base.h @@ -141,6 +141,16 @@ class AssemblerTestBase : public testing::Test { virtual std::vector<std::string> GetAssemblerCommand() { InstructionSet isa = GetIsa(); switch (isa) { + case InstructionSet::kRiscv64: + // TODO(riscv64): Support compression (RV32C) in assembler and tests (add `c` to `-march=`). + return {FindTool("clang"), + "--compile", + "-target", + "riscv64-linux-gnu", + "-march=rv64imafd_zba_zbb", + // Force the assembler to fully emit branch instructions instead of leaving + // offsets unresolved with relocation information for the linker. + "-mno-relax"}; case InstructionSet::kX86: return {FindTool("clang"), "--compile", "-target", "i386-linux-gnu"}; case InstructionSet::kX86_64: @@ -159,6 +169,15 @@ class AssemblerTestBase : public testing::Test { "--no-print-imm-hex", "--triple", "thumbv7a-linux-gnueabi"}; + case InstructionSet::kRiscv64: + return {FindTool("llvm-objdump"), + "--disassemble", + "--no-print-imm-hex", + "--no-show-raw-insn", + // Disassemble Standard Extensions supported by the assembler. + "--mattr=+F,+D,+A,+Zba,+Zbb", + "-M", + "no-aliases"}; default: return { FindTool("llvm-objdump"), "--disassemble", "--no-print-imm-hex", "--no-show-raw-insn"}; diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index 672cd3d10f..53cb3d6f8e 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -79,7 +79,7 @@ class ArmVIXLAssemblerTest : public AssemblerTestBase { size_t cs = __ CodeSize(); std::vector<uint8_t> managed_code(cs); MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); + __ CopyInstructions(code); DumpAndCheck(managed_code, testname, expected); } diff --git a/compiler/utils/jni_macro_assembler.cc b/compiler/utils/jni_macro_assembler.cc index 8b47b38e63..dc7ec60032 100644 --- a/compiler/utils/jni_macro_assembler.cc +++ b/compiler/utils/jni_macro_assembler.cc @@ -25,6 +25,9 @@ #ifdef ART_ENABLE_CODEGEN_arm64 #include "arm64/jni_macro_assembler_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_riscv64 +#include "riscv64/jni_macro_assembler_riscv64.h" +#endif #ifdef ART_ENABLE_CODEGEN_x86 #include "x86/jni_macro_assembler_x86.h" #endif @@ -34,6 +37,7 @@ #include "base/casts.h" #include "base/globals.h" #include "base/memory_region.h" +#include "gc_root.h" namespace art HIDDEN { @@ -79,6 +83,10 @@ MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create( case InstructionSet::kArm64: return MacroAsm64UniquePtr(new (allocator) arm64::Arm64JNIMacroAssembler(allocator)); #endif +#ifdef ART_ENABLE_CODEGEN_riscv64 + case InstructionSet::kRiscv64: + return MacroAsm64UniquePtr(new (allocator) riscv64::Riscv64JNIMacroAssembler(allocator)); +#endif #ifdef ART_ENABLE_CODEGEN_x86_64 case InstructionSet::kX86_64: return MacroAsm64UniquePtr(new (allocator) x86_64::X86_64JNIMacroAssembler(allocator)); @@ -90,4 +98,21 @@ MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create( } } +template <PointerSize kPointerSize> +void JNIMacroAssembler<kPointerSize>::LoadGcRootWithoutReadBarrier(ManagedRegister dest, + ManagedRegister base, + MemberOffset offs) { + static_assert(sizeof(uint32_t) == sizeof(GcRoot<mirror::Object>)); + Load(dest, base, offs, sizeof(uint32_t)); +} + +template +void JNIMacroAssembler<PointerSize::k32>::LoadGcRootWithoutReadBarrier(ManagedRegister dest, + ManagedRegister base, + MemberOffset offs); +template +void JNIMacroAssembler<PointerSize::k64>::LoadGcRootWithoutReadBarrier(ManagedRegister dest, + ManagedRegister base, + MemberOffset offs); + } // namespace art diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h index 0c729705dc..2d51439ee8 100644 --- a/compiler/utils/jni_macro_assembler.h +++ b/compiler/utils/jni_macro_assembler.h @@ -92,7 +92,7 @@ class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> { virtual size_t CodeSize() const = 0; // Copy instructions out of assembly buffer into the given region of memory - virtual void FinalizeInstructions(const MemoryRegion& region) = 0; + virtual void CopyInstructions(const MemoryRegion& region) = 0; // Emit code that will create an activation on the stack virtual void BuildFrame(size_t frame_size, @@ -129,9 +129,14 @@ class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> { // Load routines virtual void Load(ManagedRegister dest, FrameOffset src, size_t size) = 0; virtual void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) = 0; - virtual void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset<kPointerSize> offs) = 0; + // Load reference from a `GcRoot<>`. The default is to load as `jint`. Some architectures + // (say, RISC-V) override this to provide a different sign- or zero-extension. + virtual void LoadGcRootWithoutReadBarrier(ManagedRegister dest, + ManagedRegister base, + MemberOffset offs); + // Copying routines // Move arguments from `srcs` locations to `dests` locations. @@ -266,8 +271,8 @@ class JNIMacroAssemblerFwd : public JNIMacroAssembler<kPointerSize> { return asm_.CodeSize(); } - void FinalizeInstructions(const MemoryRegion& region) override { - asm_.FinalizeInstructions(region); + void CopyInstructions(const MemoryRegion& region) override { + asm_.CopyInstructions(region); } DebugFrameOpCodeWriterForAssembler& cfi() override { diff --git a/compiler/utils/jni_macro_assembler_test.h b/compiler/utils/jni_macro_assembler_test.h index ac8e7d3010..ff182e6146 100644 --- a/compiler/utils/jni_macro_assembler_test.h +++ b/compiler/utils/jni_macro_assembler_test.h @@ -77,15 +77,14 @@ class JNIMacroAssemblerTest : public AssemblerTestBase { private: // Override this to pad the code with NOPs to a certain size if needed. - virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) { - } + virtual void Pad([[maybe_unused]] std::vector<uint8_t>& data) {} void DriverWrapper(const std::string& assembly_text, const std::string& test_name) { assembler_->FinalizeCode(); size_t cs = assembler_->CodeSize(); std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*data)[0], data->size()); - assembler_->FinalizeInstructions(code); + assembler_->CopyInstructions(code); Pad(*data); Driver(*data, assembly_text, test_name); } diff --git a/compiler/utils/label.h b/compiler/utils/label.h index 0368d90a26..25bf01376b 100644 --- a/compiler/utils/label.h +++ b/compiler/utils/label.h @@ -31,6 +31,10 @@ class AssemblerFixup; namespace arm64 { class Arm64Assembler; } // namespace arm64 +namespace riscv64 { +class Riscv64Assembler; +class Riscv64Label; +} // namespace riscv64 namespace x86 { class X86Assembler; class NearLabel; @@ -109,6 +113,8 @@ class Label { } friend class arm64::Arm64Assembler; + friend class riscv64::Riscv64Assembler; + friend class riscv64::Riscv64Label; friend class x86::X86Assembler; friend class x86::NearLabel; friend class x86_64::X86_64Assembler; diff --git a/compiler/utils/riscv64/assembler_riscv64.cc b/compiler/utils/riscv64/assembler_riscv64.cc new file mode 100644 index 0000000000..089bc5dfe6 --- /dev/null +++ b/compiler/utils/riscv64/assembler_riscv64.cc @@ -0,0 +1,2422 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "assembler_riscv64.h" + +#include "base/bit_utils.h" +#include "base/casts.h" +#include "base/logging.h" +#include "base/memory_region.h" + +namespace art HIDDEN { +namespace riscv64 { + +static_assert(static_cast<size_t>(kRiscv64PointerSize) == kRiscv64DoublewordSize, + "Unexpected Riscv64 pointer size."); +static_assert(kRiscv64PointerSize == PointerSize::k64, "Unexpected Riscv64 pointer size."); + +// Split 32-bit offset into an `imm20` for LUI/AUIPC and +// a signed 12-bit short offset for ADDI/JALR/etc. +ALWAYS_INLINE static inline std::pair<uint32_t, int32_t> SplitOffset(int32_t offset) { + // The highest 0x800 values are out of range. + DCHECK_LT(offset, 0x7ffff800); + // Round `offset` to nearest 4KiB offset because short offset has range [-0x800, 0x800). + int32_t near_offset = (offset + 0x800) & ~0xfff; + // Calculate the short offset. + int32_t short_offset = offset - near_offset; + DCHECK(IsInt<12>(short_offset)); + // Extract the `imm20`. + uint32_t imm20 = static_cast<uint32_t>(near_offset) >> 12; + // Return the result as a pair. + return std::make_pair(imm20, short_offset); +} + +ALWAYS_INLINE static inline int32_t ToInt12(uint32_t uint12) { + DCHECK(IsUint<12>(uint12)); + return static_cast<int32_t>(uint12 - ((uint12 & 0x800) << 1)); +} + +void Riscv64Assembler::FinalizeCode() { + CHECK(!finalized_); + Assembler::FinalizeCode(); + ReserveJumpTableSpace(); + EmitLiterals(); + PromoteBranches(); + EmitBranches(); + EmitJumpTables(); + PatchCFI(); + finalized_ = true; +} + +void Riscv64Assembler::Emit(uint32_t value) { + if (overwriting_) { + // Branches to labels are emitted into their placeholders here. + buffer_.Store<uint32_t>(overwrite_location_, value); + overwrite_location_ += sizeof(uint32_t); + } else { + // Other instructions are simply appended at the end here. + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + buffer_.Emit<uint32_t>(value); + } +} + +/////////////////////////////// RV64 VARIANTS extension /////////////////////////////// + +//////////////////////////////// RV64 "I" Instructions //////////////////////////////// + +// LUI/AUIPC (RV32I, with sign-extension on RV64I), opcode = 0x17, 0x37 + +void Riscv64Assembler::Lui(XRegister rd, uint32_t imm20) { + EmitU(imm20, rd, 0x37); +} + +void Riscv64Assembler::Auipc(XRegister rd, uint32_t imm20) { + EmitU(imm20, rd, 0x17); +} + +// Jump instructions (RV32I), opcode = 0x67, 0x6f + +void Riscv64Assembler::Jal(XRegister rd, int32_t offset) { + EmitJ(offset, rd, 0x6F); +} + +void Riscv64Assembler::Jalr(XRegister rd, XRegister rs1, int32_t offset) { + EmitI(offset, rs1, 0x0, rd, 0x67); +} + +// Branch instructions, opcode = 0x63 (subfunc from 0x0 ~ 0x7), 0x67, 0x6f + +void Riscv64Assembler::Beq(XRegister rs1, XRegister rs2, int32_t offset) { + EmitB(offset, rs2, rs1, 0x0, 0x63); +} + +void Riscv64Assembler::Bne(XRegister rs1, XRegister rs2, int32_t offset) { + EmitB(offset, rs2, rs1, 0x1, 0x63); +} + +void Riscv64Assembler::Blt(XRegister rs1, XRegister rs2, int32_t offset) { + EmitB(offset, rs2, rs1, 0x4, 0x63); +} + +void Riscv64Assembler::Bge(XRegister rs1, XRegister rs2, int32_t offset) { + EmitB(offset, rs2, rs1, 0x5, 0x63); +} + +void Riscv64Assembler::Bltu(XRegister rs1, XRegister rs2, int32_t offset) { + EmitB(offset, rs2, rs1, 0x6, 0x63); +} + +void Riscv64Assembler::Bgeu(XRegister rs1, XRegister rs2, int32_t offset) { + EmitB(offset, rs2, rs1, 0x7, 0x63); +} + +// Load instructions (RV32I+RV64I): opcode = 0x03, funct3 from 0x0 ~ 0x6 + +void Riscv64Assembler::Lb(XRegister rd, XRegister rs1, int32_t offset) { + EmitI(offset, rs1, 0x0, rd, 0x03); +} + +void Riscv64Assembler::Lh(XRegister rd, XRegister rs1, int32_t offset) { + EmitI(offset, rs1, 0x1, rd, 0x03); +} + +void Riscv64Assembler::Lw(XRegister rd, XRegister rs1, int32_t offset) { + EmitI(offset, rs1, 0x2, rd, 0x03); +} + +void Riscv64Assembler::Ld(XRegister rd, XRegister rs1, int32_t offset) { + EmitI(offset, rs1, 0x3, rd, 0x03); +} + +void Riscv64Assembler::Lbu(XRegister rd, XRegister rs1, int32_t offset) { + EmitI(offset, rs1, 0x4, rd, 0x03); +} + +void Riscv64Assembler::Lhu(XRegister rd, XRegister rs1, int32_t offset) { + EmitI(offset, rs1, 0x5, rd, 0x03); +} + +void Riscv64Assembler::Lwu(XRegister rd, XRegister rs1, int32_t offset) { + EmitI(offset, rs1, 0x6, rd, 0x3); +} + +// Store instructions (RV32I+RV64I): opcode = 0x23, funct3 from 0x0 ~ 0x3 + +void Riscv64Assembler::Sb(XRegister rs2, XRegister rs1, int32_t offset) { + EmitS(offset, rs2, rs1, 0x0, 0x23); +} + +void Riscv64Assembler::Sh(XRegister rs2, XRegister rs1, int32_t offset) { + EmitS(offset, rs2, rs1, 0x1, 0x23); +} + +void Riscv64Assembler::Sw(XRegister rs2, XRegister rs1, int32_t offset) { + EmitS(offset, rs2, rs1, 0x2, 0x23); +} + +void Riscv64Assembler::Sd(XRegister rs2, XRegister rs1, int32_t offset) { + EmitS(offset, rs2, rs1, 0x3, 0x23); +} + +// IMM ALU instructions (RV32I): opcode = 0x13, funct3 from 0x0 ~ 0x7 + +void Riscv64Assembler::Addi(XRegister rd, XRegister rs1, int32_t imm12) { + EmitI(imm12, rs1, 0x0, rd, 0x13); +} + +void Riscv64Assembler::Slti(XRegister rd, XRegister rs1, int32_t imm12) { + EmitI(imm12, rs1, 0x2, rd, 0x13); +} + +void Riscv64Assembler::Sltiu(XRegister rd, XRegister rs1, int32_t imm12) { + EmitI(imm12, rs1, 0x3, rd, 0x13); +} + +void Riscv64Assembler::Xori(XRegister rd, XRegister rs1, int32_t imm12) { + EmitI(imm12, rs1, 0x4, rd, 0x13); +} + +void Riscv64Assembler::Ori(XRegister rd, XRegister rs1, int32_t imm12) { + EmitI(imm12, rs1, 0x6, rd, 0x13); +} + +void Riscv64Assembler::Andi(XRegister rd, XRegister rs1, int32_t imm12) { + EmitI(imm12, rs1, 0x7, rd, 0x13); +} + +// 0x1 Split: 0x0(6b) + imm12(6b) +void Riscv64Assembler::Slli(XRegister rd, XRegister rs1, int32_t shamt) { + CHECK_LT(static_cast<uint32_t>(shamt), 64u); + EmitI6(0x0, shamt, rs1, 0x1, rd, 0x13); +} + +// 0x5 Split: 0x0(6b) + imm12(6b) +void Riscv64Assembler::Srli(XRegister rd, XRegister rs1, int32_t shamt) { + CHECK_LT(static_cast<uint32_t>(shamt), 64u); + EmitI6(0x0, shamt, rs1, 0x5, rd, 0x13); +} + +// 0x5 Split: 0x10(6b) + imm12(6b) +void Riscv64Assembler::Srai(XRegister rd, XRegister rs1, int32_t shamt) { + CHECK_LT(static_cast<uint32_t>(shamt), 64u); + EmitI6(0x10, shamt, rs1, 0x5, rd, 0x13); +} + +// ALU instructions (RV32I): opcode = 0x33, funct3 from 0x0 ~ 0x7 + +void Riscv64Assembler::Add(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x0, rd, 0x33); +} + +void Riscv64Assembler::Sub(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x20, rs2, rs1, 0x0, rd, 0x33); +} + +void Riscv64Assembler::Slt(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x02, rd, 0x33); +} + +void Riscv64Assembler::Sltu(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x03, rd, 0x33); +} + +void Riscv64Assembler::Xor(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x04, rd, 0x33); +} + +void Riscv64Assembler::Or(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x06, rd, 0x33); +} + +void Riscv64Assembler::And(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x07, rd, 0x33); +} + +void Riscv64Assembler::Sll(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x01, rd, 0x33); +} + +void Riscv64Assembler::Srl(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x05, rd, 0x33); +} + +void Riscv64Assembler::Sra(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x20, rs2, rs1, 0x05, rd, 0x33); +} + +// 32bit Imm ALU instructions (RV64I): opcode = 0x1b, funct3 from 0x0, 0x1, 0x5 + +void Riscv64Assembler::Addiw(XRegister rd, XRegister rs1, int32_t imm12) { + EmitI(imm12, rs1, 0x0, rd, 0x1b); +} + +void Riscv64Assembler::Slliw(XRegister rd, XRegister rs1, int32_t shamt) { + CHECK_LT(static_cast<uint32_t>(shamt), 32u); + EmitR(0x0, shamt, rs1, 0x1, rd, 0x1b); +} + +void Riscv64Assembler::Srliw(XRegister rd, XRegister rs1, int32_t shamt) { + CHECK_LT(static_cast<uint32_t>(shamt), 32u); + EmitR(0x0, shamt, rs1, 0x5, rd, 0x1b); +} + +void Riscv64Assembler::Sraiw(XRegister rd, XRegister rs1, int32_t shamt) { + CHECK_LT(static_cast<uint32_t>(shamt), 32u); + EmitR(0x20, shamt, rs1, 0x5, rd, 0x1b); +} + +// 32bit ALU instructions (RV64I): opcode = 0x3b, funct3 from 0x0 ~ 0x7 + +void Riscv64Assembler::Addw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x0, rd, 0x3b); +} + +void Riscv64Assembler::Subw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x20, rs2, rs1, 0x0, rd, 0x3b); +} + +void Riscv64Assembler::Sllw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x1, rd, 0x3b); +} + +void Riscv64Assembler::Srlw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x0, rs2, rs1, 0x5, rd, 0x3b); +} + +void Riscv64Assembler::Sraw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x20, rs2, rs1, 0x5, rd, 0x3b); +} + +// Environment call and breakpoint (RV32I), opcode = 0x73 + +void Riscv64Assembler::Ecall() { EmitI(0x0, 0x0, 0x0, 0x0, 0x73); } + +void Riscv64Assembler::Ebreak() { EmitI(0x1, 0x0, 0x0, 0x0, 0x73); } + +// Fence instruction (RV32I): opcode = 0xf, funct3 = 0 + +void Riscv64Assembler::Fence(uint32_t pred, uint32_t succ) { + DCHECK(IsUint<4>(pred)); + DCHECK(IsUint<4>(succ)); + EmitI(/* normal fence */ 0x0 << 8 | pred << 4 | succ, 0x0, 0x0, 0x0, 0xf); +} + +void Riscv64Assembler::FenceTso() { + static constexpr uint32_t kPred = kFenceWrite | kFenceRead; + static constexpr uint32_t kSucc = kFenceWrite | kFenceRead; + EmitI(ToInt12(/* TSO fence */ 0x8 << 8 | kPred << 4 | kSucc), 0x0, 0x0, 0x0, 0xf); +} + +//////////////////////////////// RV64 "I" Instructions END //////////////////////////////// + +/////////////////////////// RV64 "Zifencei" Instructions START //////////////////////////// + +// "Zifencei" Standard Extension, opcode = 0xf, funct3 = 1 +void Riscv64Assembler::FenceI() { EmitI(0x0, 0x0, 0x1, 0x0, 0xf); } + +//////////////////////////// RV64 "Zifencei" Instructions END ///////////////////////////// + +/////////////////////////////// RV64 "M" Instructions START /////////////////////////////// + +// RV32M Standard Extension: opcode = 0x33, funct3 from 0x0 ~ 0x7 + +void Riscv64Assembler::Mul(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x0, rd, 0x33); +} + +void Riscv64Assembler::Mulh(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x1, rd, 0x33); +} + +void Riscv64Assembler::Mulhsu(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x2, rd, 0x33); +} + +void Riscv64Assembler::Mulhu(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x3, rd, 0x33); +} + +void Riscv64Assembler::Div(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x4, rd, 0x33); +} + +void Riscv64Assembler::Divu(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x5, rd, 0x33); +} + +void Riscv64Assembler::Rem(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x6, rd, 0x33); +} + +void Riscv64Assembler::Remu(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x7, rd, 0x33); +} + +// RV64M Standard Extension: opcode = 0x3b, funct3 0x0 and from 0x4 ~ 0x7 + +void Riscv64Assembler::Mulw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x0, rd, 0x3b); +} + +void Riscv64Assembler::Divw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x4, rd, 0x3b); +} + +void Riscv64Assembler::Divuw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x5, rd, 0x3b); +} + +void Riscv64Assembler::Remw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x6, rd, 0x3b); +} + +void Riscv64Assembler::Remuw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x1, rs2, rs1, 0x7, rd, 0x3b); +} + +//////////////////////////////// RV64 "M" Instructions END //////////////////////////////// + +/////////////////////////////// RV64 "A" Instructions START /////////////////////////////// + +void Riscv64Assembler::LrW(XRegister rd, XRegister rs1, AqRl aqrl) { + CHECK(aqrl != AqRl::kRelease); + EmitR4(0x2, enum_cast<uint32_t>(aqrl), 0x0, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::LrD(XRegister rd, XRegister rs1, AqRl aqrl) { + CHECK(aqrl != AqRl::kRelease); + EmitR4(0x2, enum_cast<uint32_t>(aqrl), 0x0, rs1, 0x3, rd, 0x2f); +} + +void Riscv64Assembler::ScW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + CHECK(aqrl != AqRl::kAcquire); + EmitR4(0x3, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::ScD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + CHECK(aqrl != AqRl::kAcquire); + EmitR4(0x3, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f); +} + +void Riscv64Assembler::AmoSwapW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x1, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::AmoSwapD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x1, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f); +} + +void Riscv64Assembler::AmoAddW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x0, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::AmoAddD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x0, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f); +} + +void Riscv64Assembler::AmoXorW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x4, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::AmoXorD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x4, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f); +} + +void Riscv64Assembler::AmoAndW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0xc, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::AmoAndD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0xc, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f); +} + +void Riscv64Assembler::AmoOrW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x8, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::AmoOrD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x8, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f); +} + +void Riscv64Assembler::AmoMinW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x10, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::AmoMinD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x10, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f); +} + +void Riscv64Assembler::AmoMaxW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x14, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::AmoMaxD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x14, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f); +} + +void Riscv64Assembler::AmoMinuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x18, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::AmoMinuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x18, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f); +} + +void Riscv64Assembler::AmoMaxuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x1c, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f); +} + +void Riscv64Assembler::AmoMaxuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) { + EmitR4(0x1c, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f); +} + +/////////////////////////////// RV64 "A" Instructions END /////////////////////////////// + +///////////////////////////// RV64 "Zicsr" Instructions START ///////////////////////////// + +// "Zicsr" Standard Extension, opcode = 0x73, funct3 from 0x1 ~ 0x3 and 0x5 ~ 0x7 + +void Riscv64Assembler::Csrrw(XRegister rd, uint32_t csr, XRegister rs1) { + EmitI(ToInt12(csr), rs1, 0x1, rd, 0x73); +} + +void Riscv64Assembler::Csrrs(XRegister rd, uint32_t csr, XRegister rs1) { + EmitI(ToInt12(csr), rs1, 0x2, rd, 0x73); +} + +void Riscv64Assembler::Csrrc(XRegister rd, uint32_t csr, XRegister rs1) { + EmitI(ToInt12(csr), rs1, 0x3, rd, 0x73); +} + +void Riscv64Assembler::Csrrwi(XRegister rd, uint32_t csr, uint32_t uimm5) { + EmitI(ToInt12(csr), uimm5, 0x5, rd, 0x73); +} + +void Riscv64Assembler::Csrrsi(XRegister rd, uint32_t csr, uint32_t uimm5) { + EmitI(ToInt12(csr), uimm5, 0x6, rd, 0x73); +} + +void Riscv64Assembler::Csrrci(XRegister rd, uint32_t csr, uint32_t uimm5) { + EmitI(ToInt12(csr), uimm5, 0x7, rd, 0x73); +} + +////////////////////////////// RV64 "Zicsr" Instructions END ////////////////////////////// + +/////////////////////////////// RV64 "FD" Instructions START /////////////////////////////// + +// FP load/store instructions (RV32F+RV32D): opcode = 0x07, 0x27 + +void Riscv64Assembler::FLw(FRegister rd, XRegister rs1, int32_t offset) { + EmitI(offset, rs1, 0x2, rd, 0x07); +} + +void Riscv64Assembler::FLd(FRegister rd, XRegister rs1, int32_t offset) { + EmitI(offset, rs1, 0x3, rd, 0x07); +} + +void Riscv64Assembler::FSw(FRegister rs2, XRegister rs1, int32_t offset) { + EmitS(offset, rs2, rs1, 0x2, 0x27); +} + +void Riscv64Assembler::FSd(FRegister rs2, XRegister rs1, int32_t offset) { + EmitS(offset, rs2, rs1, 0x3, 0x27); +} + +// FP FMA instructions (RV32F+RV32D): opcode = 0x43, 0x47, 0x4b, 0x4f + +void Riscv64Assembler::FMAddS( + FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) { + EmitR4(rs3, 0x0, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x43); +} + +void Riscv64Assembler::FMAddD( + FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) { + EmitR4(rs3, 0x1, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x43); +} + +void Riscv64Assembler::FMSubS( + FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) { + EmitR4(rs3, 0x0, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x47); +} + +void Riscv64Assembler::FMSubD( + FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) { + EmitR4(rs3, 0x1, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x47); +} + +void Riscv64Assembler::FNMSubS( + FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) { + EmitR4(rs3, 0x0, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x4b); +} + +void Riscv64Assembler::FNMSubD( + FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) { + EmitR4(rs3, 0x1, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x4b); +} + +void Riscv64Assembler::FNMAddS( + FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) { + EmitR4(rs3, 0x0, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x4f); +} + +void Riscv64Assembler::FNMAddD( + FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) { + EmitR4(rs3, 0x1, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x4f); +} + +// Simple FP instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b0XXXX0D + +void Riscv64Assembler::FAddS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) { + EmitR(0x0, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FAddD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) { + EmitR(0x1, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FSubS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) { + EmitR(0x4, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FSubD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) { + EmitR(0x5, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FMulS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) { + EmitR(0x8, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FMulD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) { + EmitR(0x9, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FDivS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) { + EmitR(0xc, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FDivD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) { + EmitR(0xd, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FSqrtS(FRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x2c, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FSqrtD(FRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x2d, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FSgnjS(FRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x10, rs2, rs1, 0x0, rd, 0x53); +} + +void Riscv64Assembler::FSgnjD(FRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x11, rs2, rs1, 0x0, rd, 0x53); +} + +void Riscv64Assembler::FSgnjnS(FRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x10, rs2, rs1, 0x1, rd, 0x53); +} + +void Riscv64Assembler::FSgnjnD(FRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x11, rs2, rs1, 0x1, rd, 0x53); +} + +void Riscv64Assembler::FSgnjxS(FRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x10, rs2, rs1, 0x2, rd, 0x53); +} + +void Riscv64Assembler::FSgnjxD(FRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x11, rs2, rs1, 0x2, rd, 0x53); +} + +void Riscv64Assembler::FMinS(FRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x14, rs2, rs1, 0x0, rd, 0x53); +} + +void Riscv64Assembler::FMinD(FRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x15, rs2, rs1, 0x0, rd, 0x53); +} + +void Riscv64Assembler::FMaxS(FRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x14, rs2, rs1, 0x1, rd, 0x53); +} + +void Riscv64Assembler::FMaxD(FRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x15, rs2, rs1, 0x1, rd, 0x53); +} + +void Riscv64Assembler::FCvtSD(FRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x20, 0x1, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtDS(FRegister rd, FRegister rs1, FPRoundingMode frm) { + // Note: The `frm` is useless, the result can represent every value of the source exactly. + EmitR(0x21, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +// FP compare instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b101000D + +void Riscv64Assembler::FEqS(XRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x50, rs2, rs1, 0x2, rd, 0x53); +} + +void Riscv64Assembler::FEqD(XRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x51, rs2, rs1, 0x2, rd, 0x53); +} + +void Riscv64Assembler::FLtS(XRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x50, rs2, rs1, 0x1, rd, 0x53); +} + +void Riscv64Assembler::FLtD(XRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x51, rs2, rs1, 0x1, rd, 0x53); +} + +void Riscv64Assembler::FLeS(XRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x50, rs2, rs1, 0x0, rd, 0x53); +} + +void Riscv64Assembler::FLeD(XRegister rd, FRegister rs1, FRegister rs2) { + EmitR(0x51, rs2, rs1, 0x0, rd, 0x53); +} + +// FP conversion instructions (RV32F+RV32D+RV64F+RV64D): opcode = 0x53, funct7 = 0b110X00D + +void Riscv64Assembler::FCvtWS(XRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x60, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtWD(XRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x61, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtWuS(XRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x60, 0x1, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtWuD(XRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x61, 0x1, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtLS(XRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x60, 0x2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtLD(XRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x61, 0x2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtLuS(XRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x60, 0x3, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtLuD(XRegister rd, FRegister rs1, FPRoundingMode frm) { + EmitR(0x61, 0x3, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtSW(FRegister rd, XRegister rs1, FPRoundingMode frm) { + EmitR(0x68, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtDW(FRegister rd, XRegister rs1, FPRoundingMode frm) { + // Note: The `frm` is useless, the result can represent every value of the source exactly. + EmitR(0x69, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtSWu(FRegister rd, XRegister rs1, FPRoundingMode frm) { + EmitR(0x68, 0x1, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtDWu(FRegister rd, XRegister rs1, FPRoundingMode frm) { + // Note: The `frm` is useless, the result can represent every value of the source exactly. + EmitR(0x69, 0x1, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtSL(FRegister rd, XRegister rs1, FPRoundingMode frm) { + EmitR(0x68, 0x2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtDL(FRegister rd, XRegister rs1, FPRoundingMode frm) { + EmitR(0x69, 0x2, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtSLu(FRegister rd, XRegister rs1, FPRoundingMode frm) { + EmitR(0x68, 0x3, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +void Riscv64Assembler::FCvtDLu(FRegister rd, XRegister rs1, FPRoundingMode frm) { + EmitR(0x69, 0x3, rs1, enum_cast<uint32_t>(frm), rd, 0x53); +} + +// FP move instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x0, funct7 = 0b111X00D + +void Riscv64Assembler::FMvXW(XRegister rd, FRegister rs1) { + EmitR(0x70, 0x0, rs1, 0x0, rd, 0x53); +} + +void Riscv64Assembler::FMvXD(XRegister rd, FRegister rs1) { + EmitR(0x71, 0x0, rs1, 0x0, rd, 0x53); +} + +void Riscv64Assembler::FMvWX(FRegister rd, XRegister rs1) { + EmitR(0x78, 0x0, rs1, 0x0, rd, 0x53); +} + +void Riscv64Assembler::FMvDX(FRegister rd, XRegister rs1) { + EmitR(0x79, 0x0, rs1, 0x0, rd, 0x53); +} + +// FP classify instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x1, funct7 = 0b111X00D + +void Riscv64Assembler::FClassS(XRegister rd, FRegister rs1) { + EmitR(0x70, 0x0, rs1, 0x1, rd, 0x53); +} + +void Riscv64Assembler::FClassD(XRegister rd, FRegister rs1) { + EmitR(0x71, 0x0, rs1, 0x1, rd, 0x53); +} + +/////////////////////////////// RV64 "FD" Instructions END /////////////////////////////// + +////////////////////////////// RV64 "Zba" Instructions START ///////////////////////////// + +void Riscv64Assembler::AddUw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x4, rs2, rs1, 0x0, rd, 0x3b); +} + +void Riscv64Assembler::Sh1Add(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x10, rs2, rs1, 0x2, rd, 0x33); +} + +void Riscv64Assembler::Sh1AddUw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x10, rs2, rs1, 0x2, rd, 0x3b); +} + +void Riscv64Assembler::Sh2Add(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x10, rs2, rs1, 0x4, rd, 0x33); +} + +void Riscv64Assembler::Sh2AddUw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x10, rs2, rs1, 0x4, rd, 0x3b); +} + +void Riscv64Assembler::Sh3Add(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x10, rs2, rs1, 0x6, rd, 0x33); +} + +void Riscv64Assembler::Sh3AddUw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x10, rs2, rs1, 0x6, rd, 0x3b); +} + +void Riscv64Assembler::SlliUw(XRegister rd, XRegister rs1, int32_t shamt) { + EmitI6(0x2, shamt, rs1, 0x1, rd, 0x1b); +} + +/////////////////////////////// RV64 "Zba" Instructions END ////////////////////////////// + +////////////////////////////// RV64 "Zbb" Instructions START ///////////////////////////// + +void Riscv64Assembler::Andn(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x20, rs2, rs1, 0x7, rd, 0x33); +} + +void Riscv64Assembler::Orn(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x20, rs2, rs1, 0x6, rd, 0x33); +} + +void Riscv64Assembler::Xnor(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x20, rs2, rs1, 0x4, rd, 0x33); +} + +void Riscv64Assembler::Clz(XRegister rd, XRegister rs1) { + EmitR(0x30, 0x0, rs1, 0x1, rd, 0x13); +} + +void Riscv64Assembler::Clzw(XRegister rd, XRegister rs1) { + EmitR(0x30, 0x0, rs1, 0x1, rd, 0x1b); +} + +void Riscv64Assembler::Ctz(XRegister rd, XRegister rs1) { + EmitR(0x30, 0x1, rs1, 0x1, rd, 0x13); +} + +void Riscv64Assembler::Ctzw(XRegister rd, XRegister rs1) { + EmitR(0x30, 0x1, rs1, 0x1, rd, 0x1b); +} + +void Riscv64Assembler::Cpop(XRegister rd, XRegister rs1) { + EmitR(0x30, 0x2, rs1, 0x1, rd, 0x13); +} + +void Riscv64Assembler::Cpopw(XRegister rd, XRegister rs1) { + EmitR(0x30, 0x2, rs1, 0x1, rd, 0x1b); +} + +void Riscv64Assembler::Min(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x5, rs2, rs1, 0x4, rd, 0x33); +} + +void Riscv64Assembler::Minu(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x5, rs2, rs1, 0x5, rd, 0x33); +} + +void Riscv64Assembler::Max(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x5, rs2, rs1, 0x6, rd, 0x33); +} + +void Riscv64Assembler::Maxu(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x5, rs2, rs1, 0x7, rd, 0x33); +} + +void Riscv64Assembler::Rol(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x30, rs2, rs1, 0x1, rd, 0x33); +} + +void Riscv64Assembler::Rolw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x30, rs2, rs1, 0x1, rd, 0x3b); +} + +void Riscv64Assembler::Ror(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x30, rs2, rs1, 0x5, rd, 0x33); +} + +void Riscv64Assembler::Rorw(XRegister rd, XRegister rs1, XRegister rs2) { + EmitR(0x30, rs2, rs1, 0x5, rd, 0x3b); +} + +void Riscv64Assembler::Rori(XRegister rd, XRegister rs1, int32_t shamt) { + CHECK_LT(static_cast<uint32_t>(shamt), 64u); + EmitI6(0x18, shamt, rs1, 0x5, rd, 0x13); +} + +void Riscv64Assembler::Roriw(XRegister rd, XRegister rs1, int32_t shamt) { + CHECK_LT(static_cast<uint32_t>(shamt), 32u); + EmitI6(0x18, shamt, rs1, 0x5, rd, 0x1b); +} + +void Riscv64Assembler::OrcB(XRegister rd, XRegister rs1) { + EmitR(0x14, 0x7, rs1, 0x5, rd, 0x13); +} + +void Riscv64Assembler::Rev8(XRegister rd, XRegister rs1) { + EmitR(0x35, 0x18, rs1, 0x5, rd, 0x13); +} + +/////////////////////////////// RV64 "Zbb" Instructions END ////////////////////////////// + +////////////////////////////// RV64 MACRO Instructions START /////////////////////////////// + +// Pseudo instructions + +void Riscv64Assembler::Nop() { Addi(Zero, Zero, 0); } + +void Riscv64Assembler::Li(XRegister rd, int64_t imm) { + LoadImmediate(rd, imm, /*can_use_tmp=*/ false); +} + +void Riscv64Assembler::Mv(XRegister rd, XRegister rs) { Addi(rd, rs, 0); } + +void Riscv64Assembler::Not(XRegister rd, XRegister rs) { Xori(rd, rs, -1); } + +void Riscv64Assembler::Neg(XRegister rd, XRegister rs) { Sub(rd, Zero, rs); } + +void Riscv64Assembler::NegW(XRegister rd, XRegister rs) { Subw(rd, Zero, rs); } + +void Riscv64Assembler::SextB(XRegister rd, XRegister rs) { + Slli(rd, rs, kXlen - 8u); + Srai(rd, rd, kXlen - 8u); +} + +void Riscv64Assembler::SextH(XRegister rd, XRegister rs) { + Slli(rd, rs, kXlen - 16u); + Srai(rd, rd, kXlen - 16u); +} + +void Riscv64Assembler::SextW(XRegister rd, XRegister rs) { Addiw(rd, rs, 0); } + +void Riscv64Assembler::ZextB(XRegister rd, XRegister rs) { Andi(rd, rs, 0xff); } + +void Riscv64Assembler::ZextH(XRegister rd, XRegister rs) { + Slli(rd, rs, kXlen - 16u); + Srli(rd, rd, kXlen - 16u); +} + +void Riscv64Assembler::ZextW(XRegister rd, XRegister rs) { + // TODO(riscv64): Use the ZEXT.W alias for ADD.UW from the Zba extension. + Slli(rd, rs, kXlen - 32u); + Srli(rd, rd, kXlen - 32u); +} + +void Riscv64Assembler::Seqz(XRegister rd, XRegister rs) { Sltiu(rd, rs, 1); } + +void Riscv64Assembler::Snez(XRegister rd, XRegister rs) { Sltu(rd, Zero, rs); } + +void Riscv64Assembler::Sltz(XRegister rd, XRegister rs) { Slt(rd, rs, Zero); } + +void Riscv64Assembler::Sgtz(XRegister rd, XRegister rs) { Slt(rd, Zero, rs); } + +void Riscv64Assembler::FMvS(FRegister rd, FRegister rs) { FSgnjS(rd, rs, rs); } + +void Riscv64Assembler::FAbsS(FRegister rd, FRegister rs) { FSgnjxS(rd, rs, rs); } + +void Riscv64Assembler::FNegS(FRegister rd, FRegister rs) { FSgnjnS(rd, rs, rs); } + +void Riscv64Assembler::FMvD(FRegister rd, FRegister rs) { FSgnjD(rd, rs, rs); } + +void Riscv64Assembler::FAbsD(FRegister rd, FRegister rs) { FSgnjxD(rd, rs, rs); } + +void Riscv64Assembler::FNegD(FRegister rd, FRegister rs) { FSgnjnD(rd, rs, rs); } + +void Riscv64Assembler::Beqz(XRegister rs, int32_t offset) { + Beq(rs, Zero, offset); +} + +void Riscv64Assembler::Bnez(XRegister rs, int32_t offset) { + Bne(rs, Zero, offset); +} + +void Riscv64Assembler::Blez(XRegister rt, int32_t offset) { + Bge(Zero, rt, offset); +} + +void Riscv64Assembler::Bgez(XRegister rt, int32_t offset) { + Bge(rt, Zero, offset); +} + +void Riscv64Assembler::Bltz(XRegister rt, int32_t offset) { + Blt(rt, Zero, offset); +} + +void Riscv64Assembler::Bgtz(XRegister rt, int32_t offset) { + Blt(Zero, rt, offset); +} + +void Riscv64Assembler::Bgt(XRegister rs, XRegister rt, int32_t offset) { + Blt(rt, rs, offset); +} + +void Riscv64Assembler::Ble(XRegister rs, XRegister rt, int32_t offset) { + Bge(rt, rs, offset); +} + +void Riscv64Assembler::Bgtu(XRegister rs, XRegister rt, int32_t offset) { + Bltu(rt, rs, offset); +} + +void Riscv64Assembler::Bleu(XRegister rs, XRegister rt, int32_t offset) { + Bgeu(rt, rs, offset); +} + +void Riscv64Assembler::J(int32_t offset) { Jal(Zero, offset); } + +void Riscv64Assembler::Jal(int32_t offset) { Jal(RA, offset); } + +void Riscv64Assembler::Jr(XRegister rs) { Jalr(Zero, rs, 0); } + +void Riscv64Assembler::Jalr(XRegister rs) { Jalr(RA, rs, 0); } + +void Riscv64Assembler::Jalr(XRegister rd, XRegister rs) { Jalr(rd, rs, 0); } + +void Riscv64Assembler::Ret() { Jalr(Zero, RA, 0); } + +void Riscv64Assembler::RdCycle(XRegister rd) { + Csrrs(rd, 0xc00, Zero); +} + +void Riscv64Assembler::RdTime(XRegister rd) { + Csrrs(rd, 0xc01, Zero); +} + +void Riscv64Assembler::RdInstret(XRegister rd) { + Csrrs(rd, 0xc02, Zero); +} + +void Riscv64Assembler::Csrr(XRegister rd, uint32_t csr) { + Csrrs(rd, csr, Zero); +} + +void Riscv64Assembler::Csrw(uint32_t csr, XRegister rs) { + Csrrw(Zero, csr, rs); +} + +void Riscv64Assembler::Csrs(uint32_t csr, XRegister rs) { + Csrrs(Zero, csr, rs); +} + +void Riscv64Assembler::Csrc(uint32_t csr, XRegister rs) { + Csrrc(Zero, csr, rs); +} + +void Riscv64Assembler::Csrwi(uint32_t csr, uint32_t uimm5) { + Csrrwi(Zero, csr, uimm5); +} + +void Riscv64Assembler::Csrsi(uint32_t csr, uint32_t uimm5) { + Csrrsi(Zero, csr, uimm5); +} + +void Riscv64Assembler::Csrci(uint32_t csr, uint32_t uimm5) { + Csrrci(Zero, csr, uimm5); +} + +void Riscv64Assembler::Loadb(XRegister rd, XRegister rs1, int32_t offset) { + LoadFromOffset<&Riscv64Assembler::Lb>(rd, rs1, offset); +} + +void Riscv64Assembler::Loadh(XRegister rd, XRegister rs1, int32_t offset) { + LoadFromOffset<&Riscv64Assembler::Lh>(rd, rs1, offset); +} + +void Riscv64Assembler::Loadw(XRegister rd, XRegister rs1, int32_t offset) { + LoadFromOffset<&Riscv64Assembler::Lw>(rd, rs1, offset); +} + +void Riscv64Assembler::Loadd(XRegister rd, XRegister rs1, int32_t offset) { + LoadFromOffset<&Riscv64Assembler::Ld>(rd, rs1, offset); +} + +void Riscv64Assembler::Loadbu(XRegister rd, XRegister rs1, int32_t offset) { + LoadFromOffset<&Riscv64Assembler::Lbu>(rd, rs1, offset); +} + +void Riscv64Assembler::Loadhu(XRegister rd, XRegister rs1, int32_t offset) { + LoadFromOffset<&Riscv64Assembler::Lhu>(rd, rs1, offset); +} + +void Riscv64Assembler::Loadwu(XRegister rd, XRegister rs1, int32_t offset) { + LoadFromOffset<&Riscv64Assembler::Lwu>(rd, rs1, offset); +} + +void Riscv64Assembler::Storeb(XRegister rs2, XRegister rs1, int32_t offset) { + StoreToOffset<&Riscv64Assembler::Sb>(rs2, rs1, offset); +} + +void Riscv64Assembler::Storeh(XRegister rs2, XRegister rs1, int32_t offset) { + StoreToOffset<&Riscv64Assembler::Sh>(rs2, rs1, offset); +} + +void Riscv64Assembler::Storew(XRegister rs2, XRegister rs1, int32_t offset) { + StoreToOffset<&Riscv64Assembler::Sw>(rs2, rs1, offset); +} + +void Riscv64Assembler::Stored(XRegister rs2, XRegister rs1, int32_t offset) { + StoreToOffset<&Riscv64Assembler::Sd>(rs2, rs1, offset); +} + +void Riscv64Assembler::FLoadw(FRegister rd, XRegister rs1, int32_t offset) { + FLoadFromOffset<&Riscv64Assembler::FLw>(rd, rs1, offset); +} + +void Riscv64Assembler::FLoadd(FRegister rd, XRegister rs1, int32_t offset) { + FLoadFromOffset<&Riscv64Assembler::FLd>(rd, rs1, offset); +} + +void Riscv64Assembler::FStorew(FRegister rs2, XRegister rs1, int32_t offset) { + FStoreToOffset<&Riscv64Assembler::FSw>(rs2, rs1, offset); +} + +void Riscv64Assembler::FStored(FRegister rs2, XRegister rs1, int32_t offset) { + FStoreToOffset<&Riscv64Assembler::FSd>(rs2, rs1, offset); +} + +void Riscv64Assembler::LoadConst32(XRegister rd, int32_t value) { + // No need to use a temporary register for 32-bit values. + LoadImmediate(rd, value, /*can_use_tmp=*/ false); +} + +void Riscv64Assembler::LoadConst64(XRegister rd, int64_t value) { + LoadImmediate(rd, value, /*can_use_tmp=*/ true); +} + +template <typename ValueType, typename Addi, typename AddLarge> +void AddConstImpl(Riscv64Assembler* assembler, + XRegister rd, + XRegister rs1, + ValueType value, + Addi&& addi, + AddLarge&& add_large) { + ScratchRegisterScope srs(assembler); + // A temporary must be available for adjustment even if it's not needed. + // However, `rd` can be used as the temporary unless it's the same as `rs1` or SP. + DCHECK_IMPLIES(rd == rs1 || rd == SP, srs.AvailableXRegisters() != 0u); + + if (IsInt<12>(value)) { + addi(rd, rs1, value); + return; + } + + constexpr int32_t kPositiveValueSimpleAdjustment = 0x7ff; + constexpr int32_t kHighestValueForSimpleAdjustment = 2 * kPositiveValueSimpleAdjustment; + constexpr int32_t kNegativeValueSimpleAdjustment = -0x800; + constexpr int32_t kLowestValueForSimpleAdjustment = 2 * kNegativeValueSimpleAdjustment; + + if (rd != rs1 && rd != SP) { + srs.IncludeXRegister(rd); + } + XRegister tmp = srs.AllocateXRegister(); + if (value >= 0 && value <= kHighestValueForSimpleAdjustment) { + addi(tmp, rs1, kPositiveValueSimpleAdjustment); + addi(rd, tmp, value - kPositiveValueSimpleAdjustment); + } else if (value < 0 && value >= kLowestValueForSimpleAdjustment) { + addi(tmp, rs1, kNegativeValueSimpleAdjustment); + addi(rd, tmp, value - kNegativeValueSimpleAdjustment); + } else { + add_large(rd, rs1, value, tmp); + } +} + +void Riscv64Assembler::AddConst32(XRegister rd, XRegister rs1, int32_t value) { + CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u); + CHECK_EQ((1u << rd) & available_scratch_core_registers_, 0u); + auto addiw = [&](XRegister rd, XRegister rs1, int32_t value) { Addiw(rd, rs1, value); }; + auto add_large = [&](XRegister rd, XRegister rs1, int32_t value, XRegister tmp) { + LoadConst32(tmp, value); + Addw(rd, rs1, tmp); + }; + AddConstImpl(this, rd, rs1, value, addiw, add_large); +} + +void Riscv64Assembler::AddConst64(XRegister rd, XRegister rs1, int64_t value) { + CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u); + CHECK_EQ((1u << rd) & available_scratch_core_registers_, 0u); + auto addi = [&](XRegister rd, XRegister rs1, int32_t value) { Addi(rd, rs1, value); }; + auto add_large = [&](XRegister rd, XRegister rs1, int64_t value, XRegister tmp) { + // We may not have another scratch register for `LoadConst64()`, so use `Li()`. + // TODO(riscv64): Refactor `LoadImmediate()` so that we can reuse the code to detect + // when the code path using the scratch reg is beneficial, and use that path with a + // small modification - instead of adding the two parts togeter, add them individually + // to the input `rs1`. (This works as long as `rd` is not the same as `tmp`.) + Li(tmp, value); + Add(rd, rs1, tmp); + }; + AddConstImpl(this, rd, rs1, value, addi, add_large); +} + +void Riscv64Assembler::Beqz(XRegister rs, Riscv64Label* label, bool is_bare) { + Beq(rs, Zero, label, is_bare); +} + +void Riscv64Assembler::Bnez(XRegister rs, Riscv64Label* label, bool is_bare) { + Bne(rs, Zero, label, is_bare); +} + +void Riscv64Assembler::Blez(XRegister rs, Riscv64Label* label, bool is_bare) { + Ble(rs, Zero, label, is_bare); +} + +void Riscv64Assembler::Bgez(XRegister rs, Riscv64Label* label, bool is_bare) { + Bge(rs, Zero, label, is_bare); +} + +void Riscv64Assembler::Bltz(XRegister rs, Riscv64Label* label, bool is_bare) { + Blt(rs, Zero, label, is_bare); +} + +void Riscv64Assembler::Bgtz(XRegister rs, Riscv64Label* label, bool is_bare) { + Bgt(rs, Zero, label, is_bare); +} + +void Riscv64Assembler::Beq(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondEQ, rs, rt); +} + +void Riscv64Assembler::Bne(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondNE, rs, rt); +} + +void Riscv64Assembler::Ble(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondLE, rs, rt); +} + +void Riscv64Assembler::Bge(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondGE, rs, rt); +} + +void Riscv64Assembler::Blt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondLT, rs, rt); +} + +void Riscv64Assembler::Bgt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondGT, rs, rt); +} + +void Riscv64Assembler::Bleu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondLEU, rs, rt); +} + +void Riscv64Assembler::Bgeu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondGEU, rs, rt); +} + +void Riscv64Assembler::Bltu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondLTU, rs, rt); +} + +void Riscv64Assembler::Bgtu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) { + Bcond(label, is_bare, kCondGTU, rs, rt); +} + +void Riscv64Assembler::Jal(XRegister rd, Riscv64Label* label, bool is_bare) { + Buncond(label, rd, is_bare); +} + +void Riscv64Assembler::J(Riscv64Label* label, bool is_bare) { + Jal(Zero, label, is_bare); +} + +void Riscv64Assembler::Jal(Riscv64Label* label, bool is_bare) { + Jal(RA, label, is_bare); +} + +void Riscv64Assembler::Loadw(XRegister rd, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 4u); + LoadLiteral(literal, rd, Branch::kLiteral); +} + +void Riscv64Assembler::Loadwu(XRegister rd, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 4u); + LoadLiteral(literal, rd, Branch::kLiteralUnsigned); +} + +void Riscv64Assembler::Loadd(XRegister rd, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 8u); + LoadLiteral(literal, rd, Branch::kLiteralLong); +} + +void Riscv64Assembler::FLoadw(FRegister rd, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 4u); + LoadLiteral(literal, rd, Branch::kLiteralFloat); +} + +void Riscv64Assembler::FLoadd(FRegister rd, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 8u); + LoadLiteral(literal, rd, Branch::kLiteralDouble); +} + +void Riscv64Assembler::Unimp() { + // TODO(riscv64): use 16-bit zero C.UNIMP once we support compression + Emit(0xC0001073); +} + +/////////////////////////////// RV64 MACRO Instructions END /////////////////////////////// + +const Riscv64Assembler::Branch::BranchInfo Riscv64Assembler::Branch::branch_info_[] = { + // Short branches (can be promoted to longer). + {4, 0, Riscv64Assembler::Branch::kOffset13}, // kCondBranch + {4, 0, Riscv64Assembler::Branch::kOffset21}, // kUncondBranch + {4, 0, Riscv64Assembler::Branch::kOffset21}, // kCall + // Short branches (can't be promoted to longer). + {4, 0, Riscv64Assembler::Branch::kOffset13}, // kBareCondBranch + {4, 0, Riscv64Assembler::Branch::kOffset21}, // kBareUncondBranch + {4, 0, Riscv64Assembler::Branch::kOffset21}, // kBareCall + + // Medium branch. + {8, 4, Riscv64Assembler::Branch::kOffset21}, // kCondBranch21 + + // Long branches. + {12, 4, Riscv64Assembler::Branch::kOffset32}, // kLongCondBranch + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLongUncondBranch + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLongCall + + // label. + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLabel + + // literals. + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLiteral + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLiteralUnsigned + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLiteralLong + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLiteralFloat + {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLiteralDouble +}; + +void Riscv64Assembler::Branch::InitShortOrLong(Riscv64Assembler::Branch::OffsetBits offset_size, + Riscv64Assembler::Branch::Type short_type, + Riscv64Assembler::Branch::Type long_type, + Riscv64Assembler::Branch::Type longest_type) { + Riscv64Assembler::Branch::Type type = short_type; + if (offset_size > branch_info_[type].offset_size) { + type = long_type; + if (offset_size > branch_info_[type].offset_size) { + type = longest_type; + } + } + type_ = type; +} + +void Riscv64Assembler::Branch::InitializeType(Type initial_type) { + OffsetBits offset_size_needed = GetOffsetSizeNeeded(location_, target_); + + switch (initial_type) { + case kCondBranch: + if (condition_ != kUncond) { + InitShortOrLong(offset_size_needed, kCondBranch, kCondBranch21, kLongCondBranch); + break; + } + FALLTHROUGH_INTENDED; + case kUncondBranch: + InitShortOrLong(offset_size_needed, kUncondBranch, kLongUncondBranch, kLongUncondBranch); + break; + case kCall: + InitShortOrLong(offset_size_needed, kCall, kLongCall, kLongCall); + break; + case kBareCondBranch: + if (condition_ != kUncond) { + type_ = kBareCondBranch; + CHECK_LE(offset_size_needed, GetOffsetSize()); + break; + } + FALLTHROUGH_INTENDED; + case kBareUncondBranch: + type_ = kBareUncondBranch; + CHECK_LE(offset_size_needed, GetOffsetSize()); + break; + case kBareCall: + type_ = kBareCall; + CHECK_LE(offset_size_needed, GetOffsetSize()); + break; + case kLabel: + type_ = initial_type; + break; + case kLiteral: + case kLiteralUnsigned: + case kLiteralLong: + case kLiteralFloat: + case kLiteralDouble: + CHECK(!IsResolved()); + type_ = initial_type; + break; + default: + LOG(FATAL) << "Unexpected branch type " << enum_cast<uint32_t>(initial_type); + UNREACHABLE(); + } + + old_type_ = type_; +} + +bool Riscv64Assembler::Branch::IsNop(BranchCondition condition, XRegister lhs, XRegister rhs) { + switch (condition) { + case kCondNE: + case kCondLT: + case kCondGT: + case kCondLTU: + case kCondGTU: + return lhs == rhs; + default: + return false; + } +} + +bool Riscv64Assembler::Branch::IsUncond(BranchCondition condition, XRegister lhs, XRegister rhs) { + switch (condition) { + case kUncond: + return true; + case kCondEQ: + case kCondGE: + case kCondLE: + case kCondLEU: + case kCondGEU: + return lhs == rhs; + default: + return false; + } +} + +Riscv64Assembler::Branch::Branch(uint32_t location, uint32_t target, XRegister rd, bool is_bare) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(rd), + rhs_reg_(Zero), + freg_(kNoFRegister), + condition_(kUncond) { + InitializeType( + (rd != Zero ? (is_bare ? kBareCall : kCall) : (is_bare ? kBareUncondBranch : kUncondBranch))); +} + +Riscv64Assembler::Branch::Branch(uint32_t location, + uint32_t target, + Riscv64Assembler::BranchCondition condition, + XRegister lhs_reg, + XRegister rhs_reg, + bool is_bare) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(lhs_reg), + rhs_reg_(rhs_reg), + freg_(kNoFRegister), + condition_(condition) { + DCHECK_NE(condition, kUncond); + DCHECK(!IsNop(condition, lhs_reg, rhs_reg)); + DCHECK(!IsUncond(condition, lhs_reg, rhs_reg)); + InitializeType(is_bare ? kBareCondBranch : kCondBranch); +} + +Riscv64Assembler::Branch::Branch(uint32_t location, + uint32_t target, + XRegister rd, + Type label_or_literal_type) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(rd), + rhs_reg_(Zero), + freg_(kNoFRegister), + condition_(kUncond) { + CHECK_NE(rd , Zero); + InitializeType(label_or_literal_type); +} + +Riscv64Assembler::Branch::Branch(uint32_t location, + uint32_t target, + FRegister rd, + Type literal_type) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(Zero), + rhs_reg_(Zero), + freg_(rd), + condition_(kUncond) { + InitializeType(literal_type); +} + +Riscv64Assembler::BranchCondition Riscv64Assembler::Branch::OppositeCondition( + Riscv64Assembler::BranchCondition cond) { + switch (cond) { + case kCondEQ: + return kCondNE; + case kCondNE: + return kCondEQ; + case kCondLT: + return kCondGE; + case kCondGE: + return kCondLT; + case kCondLE: + return kCondGT; + case kCondGT: + return kCondLE; + case kCondLTU: + return kCondGEU; + case kCondGEU: + return kCondLTU; + case kCondLEU: + return kCondGTU; + case kCondGTU: + return kCondLEU; + case kUncond: + LOG(FATAL) << "Unexpected branch condition " << enum_cast<uint32_t>(cond); + UNREACHABLE(); + } +} + +Riscv64Assembler::Branch::Type Riscv64Assembler::Branch::GetType() const { return type_; } + +Riscv64Assembler::BranchCondition Riscv64Assembler::Branch::GetCondition() const { + return condition_; +} + +XRegister Riscv64Assembler::Branch::GetLeftRegister() const { return lhs_reg_; } + +XRegister Riscv64Assembler::Branch::GetRightRegister() const { return rhs_reg_; } + +FRegister Riscv64Assembler::Branch::GetFRegister() const { return freg_; } + +uint32_t Riscv64Assembler::Branch::GetTarget() const { return target_; } + +uint32_t Riscv64Assembler::Branch::GetLocation() const { return location_; } + +uint32_t Riscv64Assembler::Branch::GetOldLocation() const { return old_location_; } + +uint32_t Riscv64Assembler::Branch::GetLength() const { return branch_info_[type_].length; } + +uint32_t Riscv64Assembler::Branch::GetOldLength() const { return branch_info_[old_type_].length; } + +uint32_t Riscv64Assembler::Branch::GetEndLocation() const { return GetLocation() + GetLength(); } + +uint32_t Riscv64Assembler::Branch::GetOldEndLocation() const { + return GetOldLocation() + GetOldLength(); +} + +bool Riscv64Assembler::Branch::IsBare() const { + switch (type_) { + case kBareUncondBranch: + case kBareCondBranch: + case kBareCall: + return true; + default: + return false; + } +} + +bool Riscv64Assembler::Branch::IsResolved() const { return target_ != kUnresolved; } + +Riscv64Assembler::Branch::OffsetBits Riscv64Assembler::Branch::GetOffsetSize() const { + return branch_info_[type_].offset_size; +} + +Riscv64Assembler::Branch::OffsetBits Riscv64Assembler::Branch::GetOffsetSizeNeeded( + uint32_t location, uint32_t target) { + // For unresolved targets assume the shortest encoding + // (later it will be made longer if needed). + if (target == kUnresolved) { + return kOffset13; + } + int64_t distance = static_cast<int64_t>(target) - location; + if (IsInt<kOffset13>(distance)) { + return kOffset13; + } else if (IsInt<kOffset21>(distance)) { + return kOffset21; + } else { + return kOffset32; + } +} + +void Riscv64Assembler::Branch::Resolve(uint32_t target) { target_ = target; } + +void Riscv64Assembler::Branch::Relocate(uint32_t expand_location, uint32_t delta) { + // All targets should be resolved before we start promoting branches. + DCHECK(IsResolved()); + if (location_ > expand_location) { + location_ += delta; + } + if (target_ > expand_location) { + target_ += delta; + } +} + +uint32_t Riscv64Assembler::Branch::PromoteIfNeeded() { + // All targets should be resolved before we start promoting branches. + DCHECK(IsResolved()); + Type old_type = type_; + switch (type_) { + // Short branches (can be promoted to longer). + case kCondBranch: { + OffsetBits needed_size = GetOffsetSizeNeeded(GetOffsetLocation(), target_); + if (needed_size <= GetOffsetSize()) { + return 0u; + } + // The offset remains the same for `kCondBranch21` for forward branches. + DCHECK_EQ(branch_info_[kCondBranch21].length - branch_info_[kCondBranch21].pc_offset, + branch_info_[kCondBranch].length - branch_info_[kCondBranch].pc_offset); + if (target_ <= location_) { + // Calculate the needed size for kCondBranch21. + needed_size = + GetOffsetSizeNeeded(location_ + branch_info_[kCondBranch21].pc_offset, target_); + } + type_ = (needed_size <= branch_info_[kCondBranch21].offset_size) + ? kCondBranch21 + : kLongCondBranch; + break; + } + case kUncondBranch: + if (GetOffsetSizeNeeded(GetOffsetLocation(), target_) <= GetOffsetSize()) { + return 0u; + } + type_ = kLongUncondBranch; + break; + case kCall: + if (GetOffsetSizeNeeded(GetOffsetLocation(), target_) <= GetOffsetSize()) { + return 0u; + } + type_ = kLongCall; + break; + // Medium branch (can be promoted to long). + case kCondBranch21: + if (GetOffsetSizeNeeded(GetOffsetLocation(), target_) <= GetOffsetSize()) { + return 0u; + } + type_ = kLongCondBranch; + break; + default: + // Other branch types cannot be promoted. + DCHECK_LE(GetOffsetSizeNeeded(GetOffsetLocation(), target_), GetOffsetSize()) << type_; + return 0u; + } + DCHECK(type_ != old_type); + DCHECK_GT(branch_info_[type_].length, branch_info_[old_type].length); + return branch_info_[type_].length - branch_info_[old_type].length; +} + +uint32_t Riscv64Assembler::Branch::GetOffsetLocation() const { + return location_ + branch_info_[type_].pc_offset; +} + +int32_t Riscv64Assembler::Branch::GetOffset() const { + CHECK(IsResolved()); + // Calculate the byte distance between instructions and also account for + // different PC-relative origins. + uint32_t offset_location = GetOffsetLocation(); + int32_t offset = static_cast<int32_t>(target_ - offset_location); + DCHECK_EQ(offset, static_cast<int64_t>(target_) - static_cast<int64_t>(offset_location)); + return offset; +} + +void Riscv64Assembler::EmitBcond(BranchCondition cond, + XRegister rs, + XRegister rt, + int32_t offset) { + switch (cond) { +#define DEFINE_CASE(COND, cond) \ + case kCond##COND: \ + B##cond(rs, rt, offset); \ + break; + DEFINE_CASE(EQ, eq) + DEFINE_CASE(NE, ne) + DEFINE_CASE(LT, lt) + DEFINE_CASE(GE, ge) + DEFINE_CASE(LE, le) + DEFINE_CASE(GT, gt) + DEFINE_CASE(LTU, ltu) + DEFINE_CASE(GEU, geu) + DEFINE_CASE(LEU, leu) + DEFINE_CASE(GTU, gtu) +#undef DEFINE_CASE + case kUncond: + LOG(FATAL) << "Unexpected branch condition " << enum_cast<uint32_t>(cond); + UNREACHABLE(); + } +} + +void Riscv64Assembler::EmitBranch(Riscv64Assembler::Branch* branch) { + CHECK(overwriting_); + overwrite_location_ = branch->GetLocation(); + const int32_t offset = branch->GetOffset(); + BranchCondition condition = branch->GetCondition(); + XRegister lhs = branch->GetLeftRegister(); + XRegister rhs = branch->GetRightRegister(); + + auto emit_auipc_and_next = [&](XRegister reg, auto next) { + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + auto [imm20, short_offset] = SplitOffset(offset); + Auipc(reg, imm20); + next(short_offset); + }; + + switch (branch->GetType()) { + // Short branches. + case Branch::kUncondBranch: + case Branch::kBareUncondBranch: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + J(offset); + break; + case Branch::kCondBranch: + case Branch::kBareCondBranch: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + EmitBcond(condition, lhs, rhs, offset); + break; + case Branch::kCall: + case Branch::kBareCall: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + DCHECK(lhs != Zero); + Jal(lhs, offset); + break; + + // Medium branch. + case Branch::kCondBranch21: + EmitBcond(Branch::OppositeCondition(condition), lhs, rhs, branch->GetLength()); + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + J(offset); + break; + + // Long branches. + case Branch::kLongCondBranch: + EmitBcond(Branch::OppositeCondition(condition), lhs, rhs, branch->GetLength()); + FALLTHROUGH_INTENDED; + case Branch::kLongUncondBranch: + emit_auipc_and_next(TMP, [&](int32_t short_offset) { Jalr(Zero, TMP, short_offset); }); + break; + case Branch::kLongCall: + DCHECK(lhs != Zero); + emit_auipc_and_next(lhs, [&](int32_t short_offset) { Jalr(lhs, lhs, short_offset); }); + break; + + // label. + case Branch::kLabel: + emit_auipc_and_next(lhs, [&](int32_t short_offset) { Addi(lhs, lhs, short_offset); }); + break; + // literals. + case Branch::kLiteral: + emit_auipc_and_next(lhs, [&](int32_t short_offset) { Lw(lhs, lhs, short_offset); }); + break; + case Branch::kLiteralUnsigned: + emit_auipc_and_next(lhs, [&](int32_t short_offset) { Lwu(lhs, lhs, short_offset); }); + break; + case Branch::kLiteralLong: + emit_auipc_and_next(lhs, [&](int32_t short_offset) { Ld(lhs, lhs, short_offset); }); + break; + case Branch::kLiteralFloat: + emit_auipc_and_next( + TMP, [&](int32_t short_offset) { FLw(branch->GetFRegister(), TMP, short_offset); }); + break; + case Branch::kLiteralDouble: + emit_auipc_and_next( + TMP, [&](int32_t short_offset) { FLd(branch->GetFRegister(), TMP, short_offset); }); + break; + } + CHECK_EQ(overwrite_location_, branch->GetEndLocation()); + CHECK_LE(branch->GetLength(), static_cast<uint32_t>(Branch::kMaxBranchLength)); +} + +void Riscv64Assembler::EmitBranches() { + CHECK(!overwriting_); + // Switch from appending instructions at the end of the buffer to overwriting + // existing instructions (branch placeholders) in the buffer. + overwriting_ = true; + for (auto& branch : branches_) { + EmitBranch(&branch); + } + overwriting_ = false; +} + +void Riscv64Assembler::FinalizeLabeledBranch(Riscv64Label* label) { + // TODO(riscv64): Support "C" Standard Extension - length may not be a multiple of 4. + DCHECK_ALIGNED(branches_.back().GetLength(), sizeof(uint32_t)); + uint32_t length = branches_.back().GetLength() / sizeof(uint32_t); + if (!label->IsBound()) { + // Branch forward (to a following label), distance is unknown. + // The first branch forward will contain 0, serving as the terminator of + // the list of forward-reaching branches. + Emit(label->position_); + length--; + // Now make the label object point to this branch + // (this forms a linked list of branches preceding this label). + uint32_t branch_id = branches_.size() - 1; + label->LinkTo(branch_id); + } + // Reserve space for the branch. + for (; length != 0u; --length) { + Nop(); + } +} + +void Riscv64Assembler::Bcond( + Riscv64Label* label, bool is_bare, BranchCondition condition, XRegister lhs, XRegister rhs) { + // TODO(riscv64): Should an assembler perform these optimizations, or should we remove them? + // If lhs = rhs, this can be a NOP. + if (Branch::IsNop(condition, lhs, rhs)) { + return; + } + if (Branch::IsUncond(condition, lhs, rhs)) { + Buncond(label, Zero, is_bare); + return; + } + + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target, condition, lhs, rhs, is_bare); + FinalizeLabeledBranch(label); +} + +void Riscv64Assembler::Buncond(Riscv64Label* label, XRegister rd, bool is_bare) { + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target, rd, is_bare); + FinalizeLabeledBranch(label); +} + +template <typename XRegisterOrFRegister> +void Riscv64Assembler::LoadLiteral(Literal* literal, + XRegisterOrFRegister rd, + Branch::Type literal_type) { + Riscv64Label* label = literal->GetLabel(); + DCHECK(!label->IsBound()); + branches_.emplace_back(buffer_.Size(), Branch::kUnresolved, rd, literal_type); + FinalizeLabeledBranch(label); +} + +Riscv64Assembler::Branch* Riscv64Assembler::GetBranch(uint32_t branch_id) { + CHECK_LT(branch_id, branches_.size()); + return &branches_[branch_id]; +} + +const Riscv64Assembler::Branch* Riscv64Assembler::GetBranch(uint32_t branch_id) const { + CHECK_LT(branch_id, branches_.size()); + return &branches_[branch_id]; +} + +void Riscv64Assembler::Bind(Riscv64Label* label) { + CHECK(!label->IsBound()); + uint32_t bound_pc = buffer_.Size(); + + // Walk the list of branches referring to and preceding this label. + // Store the previously unknown target addresses in them. + while (label->IsLinked()) { + uint32_t branch_id = label->Position(); + Branch* branch = GetBranch(branch_id); + branch->Resolve(bound_pc); + + uint32_t branch_location = branch->GetLocation(); + // Extract the location of the previous branch in the list (walking the list backwards; + // the previous branch ID was stored in the space reserved for this branch). + uint32_t prev = buffer_.Load<uint32_t>(branch_location); + + // On to the previous branch in the list... + label->position_ = prev; + } + + // Now make the label object contain its own location (relative to the end of the preceding + // branch, if any; it will be used by the branches referring to and following this label). + uint32_t prev_branch_id = Riscv64Label::kNoPrevBranchId; + if (!branches_.empty()) { + prev_branch_id = branches_.size() - 1u; + const Branch* prev_branch = GetBranch(prev_branch_id); + bound_pc -= prev_branch->GetEndLocation(); + } + label->prev_branch_id_ = prev_branch_id; + label->BindTo(bound_pc); +} + +void Riscv64Assembler::LoadLabelAddress(XRegister rd, Riscv64Label* label) { + DCHECK_NE(rd, Zero); + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target, rd, Branch::kLabel); + FinalizeLabeledBranch(label); +} + +Literal* Riscv64Assembler::NewLiteral(size_t size, const uint8_t* data) { + // We don't support byte and half-word literals. + if (size == 4u) { + literals_.emplace_back(size, data); + return &literals_.back(); + } else { + DCHECK_EQ(size, 8u); + long_literals_.emplace_back(size, data); + return &long_literals_.back(); + } +} + +JumpTable* Riscv64Assembler::CreateJumpTable(ArenaVector<Riscv64Label*>&& labels) { + jump_tables_.emplace_back(std::move(labels)); + JumpTable* table = &jump_tables_.back(); + DCHECK(!table->GetLabel()->IsBound()); + return table; +} + +uint32_t Riscv64Assembler::GetLabelLocation(const Riscv64Label* label) const { + CHECK(label->IsBound()); + uint32_t target = label->Position(); + if (label->prev_branch_id_ != Riscv64Label::kNoPrevBranchId) { + // Get label location based on the branch preceding it. + const Branch* prev_branch = GetBranch(label->prev_branch_id_); + target += prev_branch->GetEndLocation(); + } + return target; +} + +uint32_t Riscv64Assembler::GetAdjustedPosition(uint32_t old_position) { + // We can reconstruct the adjustment by going through all the branches from the beginning + // up to the `old_position`. Since we expect `GetAdjustedPosition()` to be called in a loop + // with increasing `old_position`, we can use the data from last `GetAdjustedPosition()` to + // continue where we left off and the whole loop should be O(m+n) where m is the number + // of positions to adjust and n is the number of branches. + if (old_position < last_old_position_) { + last_position_adjustment_ = 0; + last_old_position_ = 0; + last_branch_id_ = 0; + } + while (last_branch_id_ != branches_.size()) { + const Branch* branch = GetBranch(last_branch_id_); + if (branch->GetLocation() >= old_position + last_position_adjustment_) { + break; + } + last_position_adjustment_ += branch->GetLength() - branch->GetOldLength(); + ++last_branch_id_; + } + last_old_position_ = old_position; + return old_position + last_position_adjustment_; +} + +void Riscv64Assembler::ReserveJumpTableSpace() { + if (!jump_tables_.empty()) { + for (JumpTable& table : jump_tables_) { + Riscv64Label* label = table.GetLabel(); + Bind(label); + + // Bulk ensure capacity, as this may be large. + size_t orig_size = buffer_.Size(); + size_t required_capacity = orig_size + table.GetSize(); + if (required_capacity > buffer_.Capacity()) { + buffer_.ExtendCapacity(required_capacity); + } +#ifndef NDEBUG + buffer_.has_ensured_capacity_ = true; +#endif + + // Fill the space with placeholder data as the data is not final + // until the branches have been promoted. And we shouldn't + // be moving uninitialized data during branch promotion. + for (size_t cnt = table.GetData().size(), i = 0; i < cnt; ++i) { + buffer_.Emit<uint32_t>(0x1abe1234u); + } + +#ifndef NDEBUG + buffer_.has_ensured_capacity_ = false; +#endif + } + } +} + +void Riscv64Assembler::PromoteBranches() { + // Promote short branches to long as necessary. + bool changed; + do { + changed = false; + for (auto& branch : branches_) { + CHECK(branch.IsResolved()); + uint32_t delta = branch.PromoteIfNeeded(); + // If this branch has been promoted and needs to expand in size, + // relocate all branches by the expansion size. + if (delta != 0u) { + changed = true; + uint32_t expand_location = branch.GetLocation(); + for (auto& branch2 : branches_) { + branch2.Relocate(expand_location, delta); + } + } + } + } while (changed); + + // Account for branch expansion by resizing the code buffer + // and moving the code in it to its final location. + size_t branch_count = branches_.size(); + if (branch_count > 0) { + // Resize. + Branch& last_branch = branches_[branch_count - 1]; + uint32_t size_delta = last_branch.GetEndLocation() - last_branch.GetOldEndLocation(); + uint32_t old_size = buffer_.Size(); + buffer_.Resize(old_size + size_delta); + // Move the code residing between branch placeholders. + uint32_t end = old_size; + for (size_t i = branch_count; i > 0;) { + Branch& branch = branches_[--i]; + uint32_t size = end - branch.GetOldEndLocation(); + buffer_.Move(branch.GetEndLocation(), branch.GetOldEndLocation(), size); + end = branch.GetOldLocation(); + } + } + + // Align 64-bit literals by moving them up by 4 bytes if needed. + // This can increase the PC-relative distance but all literals are accessed with AUIPC+Load(imm12) + // without branch promotion, so this late adjustment cannot take them out of instruction range. + if (!long_literals_.empty()) { + uint32_t first_literal_location = GetLabelLocation(long_literals_.front().GetLabel()); + size_t lit_size = long_literals_.size() * sizeof(uint64_t); + size_t buf_size = buffer_.Size(); + // 64-bit literals must be at the very end of the buffer. + CHECK_EQ(first_literal_location + lit_size, buf_size); + if (!IsAligned<sizeof(uint64_t)>(first_literal_location)) { + // Insert the padding. + buffer_.Resize(buf_size + sizeof(uint32_t)); + buffer_.Move(first_literal_location + sizeof(uint32_t), first_literal_location, lit_size); + DCHECK(!overwriting_); + overwriting_ = true; + overwrite_location_ = first_literal_location; + Emit(0); // Illegal instruction. + overwriting_ = false; + // Increase target addresses in literal and address loads by 4 bytes in order for correct + // offsets from PC to be generated. + for (auto& branch : branches_) { + uint32_t target = branch.GetTarget(); + if (target >= first_literal_location) { + branch.Resolve(target + sizeof(uint32_t)); + } + } + // If after this we ever call GetLabelLocation() to get the location of a 64-bit literal, + // we need to adjust the location of the literal's label as well. + for (Literal& literal : long_literals_) { + // Bound label's position is negative, hence decrementing it instead of incrementing. + literal.GetLabel()->position_ -= sizeof(uint32_t); + } + } + } +} + +void Riscv64Assembler::PatchCFI() { + if (cfi().NumberOfDelayedAdvancePCs() == 0u) { + return; + } + + using DelayedAdvancePC = DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC; + const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC(); + const std::vector<uint8_t>& old_stream = data.first; + const std::vector<DelayedAdvancePC>& advances = data.second; + + // Refill our data buffer with patched opcodes. + static constexpr size_t kExtraSpace = 16; // Not every PC advance can be encoded in one byte. + cfi().ReserveCFIStream(old_stream.size() + advances.size() + kExtraSpace); + size_t stream_pos = 0; + for (const DelayedAdvancePC& advance : advances) { + DCHECK_GE(advance.stream_pos, stream_pos); + // Copy old data up to the point where advance was issued. + cfi().AppendRawData(old_stream, stream_pos, advance.stream_pos); + stream_pos = advance.stream_pos; + // Insert the advance command with its final offset. + size_t final_pc = GetAdjustedPosition(advance.pc); + cfi().AdvancePC(final_pc); + } + // Copy the final segment if any. + cfi().AppendRawData(old_stream, stream_pos, old_stream.size()); +} + +void Riscv64Assembler::EmitJumpTables() { + if (!jump_tables_.empty()) { + CHECK(!overwriting_); + // Switch from appending instructions at the end of the buffer to overwriting + // existing instructions (here, jump tables) in the buffer. + overwriting_ = true; + + for (JumpTable& table : jump_tables_) { + Riscv64Label* table_label = table.GetLabel(); + uint32_t start = GetLabelLocation(table_label); + overwrite_location_ = start; + + for (Riscv64Label* target : table.GetData()) { + CHECK_EQ(buffer_.Load<uint32_t>(overwrite_location_), 0x1abe1234u); + // The table will contain target addresses relative to the table start. + uint32_t offset = GetLabelLocation(target) - start; + Emit(offset); + } + } + + overwriting_ = false; + } +} + +void Riscv64Assembler::EmitLiterals() { + if (!literals_.empty()) { + for (Literal& literal : literals_) { + Riscv64Label* label = literal.GetLabel(); + Bind(label); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + DCHECK_EQ(literal.GetSize(), 4u); + for (size_t i = 0, size = literal.GetSize(); i != size; ++i) { + buffer_.Emit<uint8_t>(literal.GetData()[i]); + } + } + } + if (!long_literals_.empty()) { + // These need to be 8-byte-aligned but we shall add the alignment padding after the branch + // promotion, if needed. Since all literals are accessed with AUIPC+Load(imm12) without branch + // promotion, this late adjustment cannot take long literals out of instruction range. + for (Literal& literal : long_literals_) { + Riscv64Label* label = literal.GetLabel(); + Bind(label); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + DCHECK_EQ(literal.GetSize(), 8u); + for (size_t i = 0, size = literal.GetSize(); i != size; ++i) { + buffer_.Emit<uint8_t>(literal.GetData()[i]); + } + } + } +} + +// This method is used to adjust the base register and offset pair for +// a load/store when the offset doesn't fit into 12-bit signed integer. +void Riscv64Assembler::AdjustBaseAndOffset(XRegister& base, + int32_t& offset, + ScratchRegisterScope& srs) { + // A scratch register must be available for adjustment even if it's not needed. + CHECK_NE(srs.AvailableXRegisters(), 0u); + if (IsInt<12>(offset)) { + return; + } + + constexpr int32_t kPositiveOffsetMaxSimpleAdjustment = 0x7ff; + constexpr int32_t kHighestOffsetForSimpleAdjustment = 2 * kPositiveOffsetMaxSimpleAdjustment; + constexpr int32_t kPositiveOffsetSimpleAdjustmentAligned8 = + RoundDown(kPositiveOffsetMaxSimpleAdjustment, 8); + constexpr int32_t kPositiveOffsetSimpleAdjustmentAligned4 = + RoundDown(kPositiveOffsetMaxSimpleAdjustment, 4); + constexpr int32_t kNegativeOffsetSimpleAdjustment = -0x800; + constexpr int32_t kLowestOffsetForSimpleAdjustment = 2 * kNegativeOffsetSimpleAdjustment; + + XRegister tmp = srs.AllocateXRegister(); + if (offset >= 0 && offset <= kHighestOffsetForSimpleAdjustment) { + // Make the adjustment 8-byte aligned (0x7f8) except for offsets that cannot be reached + // with this adjustment, then try 4-byte alignment, then just half of the offset. + int32_t adjustment = IsInt<12>(offset - kPositiveOffsetSimpleAdjustmentAligned8) + ? kPositiveOffsetSimpleAdjustmentAligned8 + : IsInt<12>(offset - kPositiveOffsetSimpleAdjustmentAligned4) + ? kPositiveOffsetSimpleAdjustmentAligned4 + : offset / 2; + DCHECK(IsInt<12>(adjustment)); + Addi(tmp, base, adjustment); + offset -= adjustment; + } else if (offset < 0 && offset >= kLowestOffsetForSimpleAdjustment) { + Addi(tmp, base, kNegativeOffsetSimpleAdjustment); + offset -= kNegativeOffsetSimpleAdjustment; + } else if (offset >= 0x7ffff800) { + // Support even large offsets outside the range supported by `SplitOffset()`. + LoadConst32(tmp, offset); + Add(tmp, tmp, base); + offset = 0; + } else { + auto [imm20, short_offset] = SplitOffset(offset); + Lui(tmp, imm20); + Add(tmp, tmp, base); + offset = short_offset; + } + base = tmp; +} + +template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)> +void Riscv64Assembler::LoadFromOffset(XRegister rd, XRegister rs1, int32_t offset) { + CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u); + CHECK_EQ((1u << rd) & available_scratch_core_registers_, 0u); + ScratchRegisterScope srs(this); + // If `rd` differs from `rs1`, allow using it as a temporary if needed. + if (rd != rs1) { + srs.IncludeXRegister(rd); + } + AdjustBaseAndOffset(rs1, offset, srs); + (this->*insn)(rd, rs1, offset); +} + +template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)> +void Riscv64Assembler::StoreToOffset(XRegister rs2, XRegister rs1, int32_t offset) { + CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u); + CHECK_EQ((1u << rs2) & available_scratch_core_registers_, 0u); + ScratchRegisterScope srs(this); + AdjustBaseAndOffset(rs1, offset, srs); + (this->*insn)(rs2, rs1, offset); +} + +template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)> +void Riscv64Assembler::FLoadFromOffset(FRegister rd, XRegister rs1, int32_t offset) { + CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u); + ScratchRegisterScope srs(this); + AdjustBaseAndOffset(rs1, offset, srs); + (this->*insn)(rd, rs1, offset); +} + +template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)> +void Riscv64Assembler::FStoreToOffset(FRegister rs2, XRegister rs1, int32_t offset) { + CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u); + ScratchRegisterScope srs(this); + AdjustBaseAndOffset(rs1, offset, srs); + (this->*insn)(rs2, rs1, offset); +} + +void Riscv64Assembler::LoadImmediate(XRegister rd, int64_t imm, bool can_use_tmp) { + CHECK_EQ((1u << rd) & available_scratch_core_registers_, 0u); + ScratchRegisterScope srs(this); + CHECK_IMPLIES(can_use_tmp, srs.AvailableXRegisters() != 0u); + + // Helper lambdas. + auto addi = [&](XRegister rd, XRegister rs, int32_t imm) { Addi(rd, rs, imm); }; + auto addiw = [&](XRegister rd, XRegister rs, int32_t imm) { Addiw(rd, rs, imm); }; + auto slli = [&](XRegister rd, XRegister rs, int32_t imm) { Slli(rd, rs, imm); }; + auto lui = [&](XRegister rd, uint32_t imm20) { Lui(rd, imm20); }; + + // Simple LUI+ADDI/W can handle value range [-0x80000800, 0x7fffffff]. + auto is_simple_li_value = [](int64_t value) { + return value >= INT64_C(-0x80000800) && value <= INT64_C(0x7fffffff); + }; + auto emit_simple_li_helper = [&](XRegister rd, + int64_t value, + auto&& addi, + auto&& addiw, + auto&& slli, + auto&& lui) { + DCHECK(is_simple_li_value(value)) << "0x" << std::hex << value; + if (IsInt<12>(value)) { + addi(rd, Zero, value); + } else if (CTZ(value) < 12 && IsInt(6 + CTZ(value), value)) { + // This path yields two 16-bit instructions with the "C" Standard Extension. + addi(rd, Zero, value >> CTZ(value)); + slli(rd, rd, CTZ(value)); + } else if (value < INT64_C(-0x80000000)) { + int32_t small_value = dchecked_integral_cast<int32_t>(value - INT64_C(-0x80000000)); + DCHECK(IsInt<12>(small_value)); + DCHECK_LT(small_value, 0); + lui(rd, 1u << 19); + addi(rd, rd, small_value); + } else { + DCHECK(IsInt<32>(value)); + // Note: Similar to `SplitOffset()` but we can target the full 32-bit range with ADDIW. + int64_t near_value = (value + 0x800) & ~0xfff; + int32_t small_value = value - near_value; + DCHECK(IsInt<12>(small_value)); + uint32_t imm20 = static_cast<uint32_t>(near_value) >> 12; + DCHECK_NE(imm20, 0u); // Small values are handled above. + lui(rd, imm20); + if (small_value != 0) { + addiw(rd, rd, small_value); + } + } + }; + auto emit_simple_li = [&](XRegister rd, int64_t value) { + emit_simple_li_helper(rd, value, addi, addiw, slli, lui); + }; + auto count_simple_li_instructions = [&](int64_t value) { + size_t num_instructions = 0u; + auto count_rri = [&](XRegister, XRegister, int32_t) { ++num_instructions; }; + auto count_ru = [&](XRegister, uint32_t) { ++num_instructions; }; + emit_simple_li_helper(Zero, value, count_rri, count_rri, count_rri, count_ru); + return num_instructions; + }; + + // If LUI+ADDI/W is not enough, we can generate up to 3 SLLI+ADDI afterwards (up to 8 instructions + // total). The ADDI from the first SLLI+ADDI pair can be a no-op. + auto emit_with_slli_addi_helper = [&](XRegister rd, + int64_t value, + auto&& addi, + auto&& addiw, + auto&& slli, + auto&& lui) { + static constexpr size_t kMaxNumSllAddi = 3u; + int32_t addi_values[kMaxNumSllAddi]; + size_t sll_shamts[kMaxNumSllAddi]; + size_t num_sll_addi = 0u; + while (!is_simple_li_value(value)) { + DCHECK_LT(num_sll_addi, kMaxNumSllAddi); + // Prepare sign-extended low 12 bits for ADDI. + int64_t addi_value = (value & 0xfff) - ((value & 0x800) << 1); + DCHECK(IsInt<12>(addi_value)); + int64_t remaining = value - addi_value; + size_t shamt = CTZ(remaining); + DCHECK_GE(shamt, 12u); + addi_values[num_sll_addi] = addi_value; + sll_shamts[num_sll_addi] = shamt; + value = remaining >> shamt; + ++num_sll_addi; + } + if (num_sll_addi != 0u && IsInt<20>(value) && !IsInt<12>(value)) { + // If `sll_shamts[num_sll_addi - 1u]` was only 12, we would have stopped + // the decomposition a step earlier with smaller `num_sll_addi`. + DCHECK_GT(sll_shamts[num_sll_addi - 1u], 12u); + // Emit the signed 20-bit value with LUI and reduce the SLLI shamt by 12 to compensate. + sll_shamts[num_sll_addi - 1u] -= 12u; + lui(rd, dchecked_integral_cast<uint32_t>(value & 0xfffff)); + } else { + emit_simple_li_helper(rd, value, addi, addiw, slli, lui); + } + for (size_t i = num_sll_addi; i != 0u; ) { + --i; + slli(rd, rd, sll_shamts[i]); + if (addi_values[i] != 0) { + addi(rd, rd, addi_values[i]); + } + } + }; + auto emit_with_slli_addi = [&](XRegister rd, int64_t value) { + emit_with_slli_addi_helper(rd, value, addi, addiw, slli, lui); + }; + auto count_instructions_with_slli_addi = [&](int64_t value) { + size_t num_instructions = 0u; + auto count_rri = [&](XRegister, XRegister, int32_t) { ++num_instructions; }; + auto count_ru = [&](XRegister, uint32_t) { ++num_instructions; }; + emit_with_slli_addi_helper(Zero, value, count_rri, count_rri, count_rri, count_ru); + return num_instructions; + }; + + size_t insns_needed = count_instructions_with_slli_addi(imm); + size_t trailing_slli_shamt = 0u; + if (insns_needed > 2u) { + // Sometimes it's better to end with a SLLI even when the above code would end with ADDI. + if ((imm & 1) == 0 && (imm & 0xfff) != 0) { + int64_t value = imm >> CTZ(imm); + size_t new_insns_needed = count_instructions_with_slli_addi(value) + /*SLLI*/ 1u; + DCHECK_GT(new_insns_needed, 2u); + if (insns_needed > new_insns_needed) { + insns_needed = new_insns_needed; + trailing_slli_shamt = CTZ(imm); + } + } + + // Sometimes we can emit a shorter sequence that ends with SRLI. + if (imm > 0) { + size_t shamt = CLZ(static_cast<uint64_t>(imm)); + DCHECK_LE(shamt, 32u); // Otherwise we would not get here as `insns_needed` would be <= 2. + if (imm == dchecked_integral_cast<int64_t>(MaxInt<uint64_t>(64 - shamt))) { + Addi(rd, Zero, -1); + Srli(rd, rd, shamt); + return; + } + + int64_t value = static_cast<int64_t>(static_cast<uint64_t>(imm) << shamt); + DCHECK_LT(value, 0); + if (is_simple_li_value(value)){ + size_t new_insns_needed = count_simple_li_instructions(value) + /*SRLI*/ 1u; + // In case of equal number of instructions, clang prefers the sequence without SRLI. + if (new_insns_needed < insns_needed) { + // If we emit ADDI, we set low bits that shall be shifted out to one in line with clang, + // effectively choosing to emit the negative constant closest to zero. + int32_t shifted_out = dchecked_integral_cast<int32_t>(MaxInt<uint32_t>(shamt)); + DCHECK_EQ(value & shifted_out, 0); + emit_simple_li(rd, (value & 0xfff) == 0 ? value : value + shifted_out); + Srli(rd, rd, shamt); + return; + } + } + + size_t ctz = CTZ(static_cast<uint64_t>(value)); + if (IsInt(ctz + 20, value)) { + size_t new_insns_needed = /*ADDI or LUI*/ 1u + /*SLLI*/ 1u + /*SRLI*/ 1u; + if (new_insns_needed < insns_needed) { + // Clang prefers ADDI+SLLI+SRLI over LUI+SLLI+SRLI. + if (IsInt(ctz + 12, value)) { + Addi(rd, Zero, value >> ctz); + Slli(rd, rd, ctz); + } else { + Lui(rd, (static_cast<uint64_t>(value) >> ctz) & 0xfffffu); + Slli(rd, rd, ctz - 12); + } + Srli(rd, rd, shamt); + return; + } + } + } + + // If we can use a scratch register, try using it to emit a shorter sequence. Without a + // scratch reg, the sequence is up to 8 instructions, with a scratch reg only up to 6. + if (can_use_tmp) { + int64_t low = (imm & 0xffffffff) - ((imm & 0x80000000) << 1); + int64_t remainder = imm - low; + size_t slli_shamt = CTZ(remainder); + DCHECK_GE(slli_shamt, 32u); + int64_t high = remainder >> slli_shamt; + size_t new_insns_needed = + ((IsInt<20>(high) || (high & 0xfff) == 0u) ? 1u : 2u) + + count_simple_li_instructions(low) + + /*SLLI+ADD*/ 2u; + if (new_insns_needed < insns_needed) { + DCHECK_NE(low & 0xfffff000, 0); + XRegister tmp = srs.AllocateXRegister(); + if (IsInt<20>(high) && !IsInt<12>(high)) { + // Emit the signed 20-bit value with LUI and reduce the SLLI shamt by 12 to compensate. + Lui(rd, static_cast<uint32_t>(high & 0xfffff)); + slli_shamt -= 12; + } else { + emit_simple_li(rd, high); + } + emit_simple_li(tmp, low); + Slli(rd, rd, slli_shamt); + Add(rd, rd, tmp); + return; + } + } + } + emit_with_slli_addi(rd, trailing_slli_shamt != 0u ? imm >> trailing_slli_shamt : imm); + if (trailing_slli_shamt != 0u) { + Slli(rd, rd, trailing_slli_shamt); + } +} + +/////////////////////////////// RV64 VARIANTS extension end //////////// + +} // namespace riscv64 +} // namespace art diff --git a/compiler/utils/riscv64/assembler_riscv64.h b/compiler/utils/riscv64/assembler_riscv64.h new file mode 100644 index 0000000000..15f2518c87 --- /dev/null +++ b/compiler/utils/riscv64/assembler_riscv64.h @@ -0,0 +1,1178 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_ +#define ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_ + +#include <cstdint> +#include <string> +#include <utility> +#include <vector> + +#include "arch/riscv64/instruction_set_features_riscv64.h" +#include "base/arena_containers.h" +#include "base/enums.h" +#include "base/globals.h" +#include "base/macros.h" +#include "managed_register_riscv64.h" +#include "utils/assembler.h" +#include "utils/label.h" + +namespace art HIDDEN { +namespace riscv64 { + +class ScratchRegisterScope; + +static constexpr size_t kRiscv64HalfwordSize = 2; +static constexpr size_t kRiscv64WordSize = 4; +static constexpr size_t kRiscv64DoublewordSize = 8; +static constexpr size_t kRiscv64FloatRegSizeInBytes = 8; + +enum class FPRoundingMode : uint32_t { + kRNE = 0x0, // Round to Nearest, ties to Even + kRTZ = 0x1, // Round towards Zero + kRDN = 0x2, // Round Down (towards −Infinity) + kRUP = 0x3, // Round Up (towards +Infinity) + kRMM = 0x4, // Round to Nearest, ties to Max Magnitude + kDYN = 0x7, // Dynamic rounding mode + kDefault = kDYN, + // Some instructions never need to round even though the spec includes the RM field. + // To simplify testing, emit the RM as 0 by default for these instructions because that's what + // `clang` does and because the `llvm-objdump` fails to disassemble the other rounding modes. + kIgnored = 0 +}; + +enum class AqRl : uint32_t { + kNone = 0x0, + kRelease = 0x1, + kAcquire = 0x2, + kAqRl = kRelease | kAcquire +}; + +// the type for fence +enum FenceType { + kFenceNone = 0, + kFenceWrite = 1, + kFenceRead = 2, + kFenceOutput = 4, + kFenceInput = 8, + kFenceDefault = 0xf, +}; + +// Used to test the values returned by FClassS/FClassD. +enum FPClassMaskType { + kNegativeInfinity = 0x001, + kNegativeNormal = 0x002, + kNegativeSubnormal = 0x004, + kNegativeZero = 0x008, + kPositiveZero = 0x010, + kPositiveSubnormal = 0x020, + kPositiveNormal = 0x040, + kPositiveInfinity = 0x080, + kSignalingNaN = 0x100, + kQuietNaN = 0x200, +}; + +class Riscv64Label : public Label { + public: + Riscv64Label() : prev_branch_id_(kNoPrevBranchId) {} + + Riscv64Label(Riscv64Label&& src) noexcept + // NOLINTNEXTLINE - src.prev_branch_id_ is valid after the move + : Label(std::move(src)), prev_branch_id_(src.prev_branch_id_) {} + + private: + static constexpr uint32_t kNoPrevBranchId = std::numeric_limits<uint32_t>::max(); + + uint32_t prev_branch_id_; // To get distance from preceding branch, if any. + + friend class Riscv64Assembler; + DISALLOW_COPY_AND_ASSIGN(Riscv64Label); +}; + +// Assembler literal is a value embedded in code, retrieved using a PC-relative load. +class Literal { + public: + static constexpr size_t kMaxSize = 8; + + Literal(uint32_t size, const uint8_t* data) : label_(), size_(size) { + DCHECK_LE(size, Literal::kMaxSize); + memcpy(data_, data, size); + } + + template <typename T> + T GetValue() const { + DCHECK_EQ(size_, sizeof(T)); + T value; + memcpy(&value, data_, sizeof(T)); + return value; + } + + uint32_t GetSize() const { return size_; } + + const uint8_t* GetData() const { return data_; } + + Riscv64Label* GetLabel() { return &label_; } + + const Riscv64Label* GetLabel() const { return &label_; } + + private: + Riscv64Label label_; + const uint32_t size_; + uint8_t data_[kMaxSize]; + + DISALLOW_COPY_AND_ASSIGN(Literal); +}; + +// Jump table: table of labels emitted after the code and before the literals. Similar to literals. +class JumpTable { + public: + explicit JumpTable(ArenaVector<Riscv64Label*>&& labels) : label_(), labels_(std::move(labels)) {} + + size_t GetSize() const { return labels_.size() * sizeof(int32_t); } + + const ArenaVector<Riscv64Label*>& GetData() const { return labels_; } + + Riscv64Label* GetLabel() { return &label_; } + + const Riscv64Label* GetLabel() const { return &label_; } + + private: + Riscv64Label label_; + ArenaVector<Riscv64Label*> labels_; + + DISALLOW_COPY_AND_ASSIGN(JumpTable); +}; + +class Riscv64Assembler final : public Assembler { + public: + explicit Riscv64Assembler(ArenaAllocator* allocator, + const Riscv64InstructionSetFeatures* instruction_set_features = nullptr) + : Assembler(allocator), + branches_(allocator->Adapter(kArenaAllocAssembler)), + finalized_(false), + overwriting_(false), + overwrite_location_(0), + literals_(allocator->Adapter(kArenaAllocAssembler)), + long_literals_(allocator->Adapter(kArenaAllocAssembler)), + jump_tables_(allocator->Adapter(kArenaAllocAssembler)), + last_position_adjustment_(0), + last_old_position_(0), + last_branch_id_(0), + available_scratch_core_registers_((1u << TMP) | (1u << TMP2)), + available_scratch_fp_registers_(1u << FTMP) { + UNUSED(instruction_set_features); + cfi().DelayEmittingAdvancePCs(); + } + + virtual ~Riscv64Assembler() { + for (auto& branch : branches_) { + CHECK(branch.IsResolved()); + } + } + + size_t CodeSize() const override { return Assembler::CodeSize(); } + DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); } + + // According to "The RISC-V Instruction Set Manual" + + // LUI/AUIPC (RV32I, with sign-extension on RV64I), opcode = 0x17, 0x37 + // Note: These take a 20-bit unsigned value to align with the clang assembler for testing, + // but the value stored in the register shall actually be sign-extended to 64 bits. + void Lui(XRegister rd, uint32_t imm20); + void Auipc(XRegister rd, uint32_t imm20); + + // Jump instructions (RV32I), opcode = 0x67, 0x6f + void Jal(XRegister rd, int32_t offset); + void Jalr(XRegister rd, XRegister rs1, int32_t offset); + + // Branch instructions (RV32I), opcode = 0x63, funct3 from 0x0 ~ 0x1 and 0x4 ~ 0x7 + void Beq(XRegister rs1, XRegister rs2, int32_t offset); + void Bne(XRegister rs1, XRegister rs2, int32_t offset); + void Blt(XRegister rs1, XRegister rs2, int32_t offset); + void Bge(XRegister rs1, XRegister rs2, int32_t offset); + void Bltu(XRegister rs1, XRegister rs2, int32_t offset); + void Bgeu(XRegister rs1, XRegister rs2, int32_t offset); + + // Load instructions (RV32I+RV64I): opcode = 0x03, funct3 from 0x0 ~ 0x6 + void Lb(XRegister rd, XRegister rs1, int32_t offset); + void Lh(XRegister rd, XRegister rs1, int32_t offset); + void Lw(XRegister rd, XRegister rs1, int32_t offset); + void Ld(XRegister rd, XRegister rs1, int32_t offset); + void Lbu(XRegister rd, XRegister rs1, int32_t offset); + void Lhu(XRegister rd, XRegister rs1, int32_t offset); + void Lwu(XRegister rd, XRegister rs1, int32_t offset); + + // Store instructions (RV32I+RV64I): opcode = 0x23, funct3 from 0x0 ~ 0x3 + void Sb(XRegister rs2, XRegister rs1, int32_t offset); + void Sh(XRegister rs2, XRegister rs1, int32_t offset); + void Sw(XRegister rs2, XRegister rs1, int32_t offset); + void Sd(XRegister rs2, XRegister rs1, int32_t offset); + + // IMM ALU instructions (RV32I): opcode = 0x13, funct3 from 0x0 ~ 0x7 + void Addi(XRegister rd, XRegister rs1, int32_t imm12); + void Slti(XRegister rd, XRegister rs1, int32_t imm12); + void Sltiu(XRegister rd, XRegister rs1, int32_t imm12); + void Xori(XRegister rd, XRegister rs1, int32_t imm12); + void Ori(XRegister rd, XRegister rs1, int32_t imm12); + void Andi(XRegister rd, XRegister rs1, int32_t imm12); + void Slli(XRegister rd, XRegister rs1, int32_t shamt); + void Srli(XRegister rd, XRegister rs1, int32_t shamt); + void Srai(XRegister rd, XRegister rs1, int32_t shamt); + + // ALU instructions (RV32I): opcode = 0x33, funct3 from 0x0 ~ 0x7 + void Add(XRegister rd, XRegister rs1, XRegister rs2); + void Sub(XRegister rd, XRegister rs1, XRegister rs2); + void Slt(XRegister rd, XRegister rs1, XRegister rs2); + void Sltu(XRegister rd, XRegister rs1, XRegister rs2); + void Xor(XRegister rd, XRegister rs1, XRegister rs2); + void Or(XRegister rd, XRegister rs1, XRegister rs2); + void And(XRegister rd, XRegister rs1, XRegister rs2); + void Sll(XRegister rd, XRegister rs1, XRegister rs2); + void Srl(XRegister rd, XRegister rs1, XRegister rs2); + void Sra(XRegister rd, XRegister rs1, XRegister rs2); + + // 32bit Imm ALU instructions (RV64I): opcode = 0x1b, funct3 from 0x0, 0x1, 0x5 + void Addiw(XRegister rd, XRegister rs1, int32_t imm12); + void Slliw(XRegister rd, XRegister rs1, int32_t shamt); + void Srliw(XRegister rd, XRegister rs1, int32_t shamt); + void Sraiw(XRegister rd, XRegister rs1, int32_t shamt); + + // 32bit ALU instructions (RV64I): opcode = 0x3b, funct3 from 0x0 ~ 0x7 + void Addw(XRegister rd, XRegister rs1, XRegister rs2); + void Subw(XRegister rd, XRegister rs1, XRegister rs2); + void Sllw(XRegister rd, XRegister rs1, XRegister rs2); + void Srlw(XRegister rd, XRegister rs1, XRegister rs2); + void Sraw(XRegister rd, XRegister rs1, XRegister rs2); + + // Environment call and breakpoint (RV32I), opcode = 0x73 + void Ecall(); + void Ebreak(); + + // Fence instruction (RV32I): opcode = 0xf, funct3 = 0 + void Fence(uint32_t pred = kFenceDefault, uint32_t succ = kFenceDefault); + void FenceTso(); + + // "Zifencei" Standard Extension, opcode = 0xf, funct3 = 1 + void FenceI(); + + // RV32M Standard Extension: opcode = 0x33, funct3 from 0x0 ~ 0x7 + void Mul(XRegister rd, XRegister rs1, XRegister rs2); + void Mulh(XRegister rd, XRegister rs1, XRegister rs2); + void Mulhsu(XRegister rd, XRegister rs1, XRegister rs2); + void Mulhu(XRegister rd, XRegister rs1, XRegister rs2); + void Div(XRegister rd, XRegister rs1, XRegister rs2); + void Divu(XRegister rd, XRegister rs1, XRegister rs2); + void Rem(XRegister rd, XRegister rs1, XRegister rs2); + void Remu(XRegister rd, XRegister rs1, XRegister rs2); + + // RV64M Standard Extension: opcode = 0x3b, funct3 0x0 and from 0x4 ~ 0x7 + void Mulw(XRegister rd, XRegister rs1, XRegister rs2); + void Divw(XRegister rd, XRegister rs1, XRegister rs2); + void Divuw(XRegister rd, XRegister rs1, XRegister rs2); + void Remw(XRegister rd, XRegister rs1, XRegister rs2); + void Remuw(XRegister rd, XRegister rs1, XRegister rs2); + + // RV32A/RV64A Standard Extension + void LrW(XRegister rd, XRegister rs1, AqRl aqrl); + void LrD(XRegister rd, XRegister rs1, AqRl aqrl); + void ScW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void ScD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoSwapW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoSwapD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoAddW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoAddD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoXorW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoXorD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoAndW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoAndD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoOrW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoOrD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoMinW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoMinD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoMaxW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoMaxD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoMinuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoMinuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoMaxuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + void AmoMaxuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl); + + // "Zicsr" Standard Extension, opcode = 0x73, funct3 from 0x1 ~ 0x3 and 0x5 ~ 0x7 + void Csrrw(XRegister rd, uint32_t csr, XRegister rs1); + void Csrrs(XRegister rd, uint32_t csr, XRegister rs1); + void Csrrc(XRegister rd, uint32_t csr, XRegister rs1); + void Csrrwi(XRegister rd, uint32_t csr, uint32_t uimm5); + void Csrrsi(XRegister rd, uint32_t csr, uint32_t uimm5); + void Csrrci(XRegister rd, uint32_t csr, uint32_t uimm5); + + // FP load/store instructions (RV32F+RV32D): opcode = 0x07, 0x27 + void FLw(FRegister rd, XRegister rs1, int32_t offset); + void FLd(FRegister rd, XRegister rs1, int32_t offset); + void FSw(FRegister rs2, XRegister rs1, int32_t offset); + void FSd(FRegister rs2, XRegister rs1, int32_t offset); + + // FP FMA instructions (RV32F+RV32D): opcode = 0x43, 0x47, 0x4b, 0x4f + void FMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm); + void FMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm); + void FMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm); + void FMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm); + void FNMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm); + void FNMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm); + void FNMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm); + void FNMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm); + + // FP FMA instruction helpers passing the default rounding mode. + void FMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) { + FMAddS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault); + } + void FMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) { + FMAddD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault); + } + void FMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) { + FMSubS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault); + } + void FMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) { + FMSubD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault); + } + void FNMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) { + FNMSubS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault); + } + void FNMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) { + FNMSubD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault); + } + void FNMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) { + FNMAddS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault); + } + void FNMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) { + FNMAddD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault); + } + + // Simple FP instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b0XXXX0D + void FAddS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm); + void FAddD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm); + void FSubS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm); + void FSubD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm); + void FMulS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm); + void FMulD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm); + void FDivS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm); + void FDivD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm); + void FSqrtS(FRegister rd, FRegister rs1, FPRoundingMode frm); + void FSqrtD(FRegister rd, FRegister rs1, FPRoundingMode frm); + void FSgnjS(FRegister rd, FRegister rs1, FRegister rs2); + void FSgnjD(FRegister rd, FRegister rs1, FRegister rs2); + void FSgnjnS(FRegister rd, FRegister rs1, FRegister rs2); + void FSgnjnD(FRegister rd, FRegister rs1, FRegister rs2); + void FSgnjxS(FRegister rd, FRegister rs1, FRegister rs2); + void FSgnjxD(FRegister rd, FRegister rs1, FRegister rs2); + void FMinS(FRegister rd, FRegister rs1, FRegister rs2); + void FMinD(FRegister rd, FRegister rs1, FRegister rs2); + void FMaxS(FRegister rd, FRegister rs1, FRegister rs2); + void FMaxD(FRegister rd, FRegister rs1, FRegister rs2); + void FCvtSD(FRegister rd, FRegister rs1, FPRoundingMode frm); + void FCvtDS(FRegister rd, FRegister rs1, FPRoundingMode frm); + + // Simple FP instruction helpers passing the default rounding mode. + void FAddS(FRegister rd, FRegister rs1, FRegister rs2) { + FAddS(rd, rs1, rs2, FPRoundingMode::kDefault); + } + void FAddD(FRegister rd, FRegister rs1, FRegister rs2) { + FAddD(rd, rs1, rs2, FPRoundingMode::kDefault); + } + void FSubS(FRegister rd, FRegister rs1, FRegister rs2) { + FSubS(rd, rs1, rs2, FPRoundingMode::kDefault); + } + void FSubD(FRegister rd, FRegister rs1, FRegister rs2) { + FSubD(rd, rs1, rs2, FPRoundingMode::kDefault); + } + void FMulS(FRegister rd, FRegister rs1, FRegister rs2) { + FMulS(rd, rs1, rs2, FPRoundingMode::kDefault); + } + void FMulD(FRegister rd, FRegister rs1, FRegister rs2) { + FMulD(rd, rs1, rs2, FPRoundingMode::kDefault); + } + void FDivS(FRegister rd, FRegister rs1, FRegister rs2) { + FDivS(rd, rs1, rs2, FPRoundingMode::kDefault); + } + void FDivD(FRegister rd, FRegister rs1, FRegister rs2) { + FDivD(rd, rs1, rs2, FPRoundingMode::kDefault); + } + void FSqrtS(FRegister rd, FRegister rs1) { + FSqrtS(rd, rs1, FPRoundingMode::kDefault); + } + void FSqrtD(FRegister rd, FRegister rs1) { + FSqrtD(rd, rs1, FPRoundingMode::kDefault); + } + void FCvtSD(FRegister rd, FRegister rs1) { + FCvtSD(rd, rs1, FPRoundingMode::kDefault); + } + void FCvtDS(FRegister rd, FRegister rs1) { + FCvtDS(rd, rs1, FPRoundingMode::kIgnored); + } + + // FP compare instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b101000D + void FEqS(XRegister rd, FRegister rs1, FRegister rs2); + void FEqD(XRegister rd, FRegister rs1, FRegister rs2); + void FLtS(XRegister rd, FRegister rs1, FRegister rs2); + void FLtD(XRegister rd, FRegister rs1, FRegister rs2); + void FLeS(XRegister rd, FRegister rs1, FRegister rs2); + void FLeD(XRegister rd, FRegister rs1, FRegister rs2); + + // FP conversion instructions (RV32F+RV32D+RV64F+RV64D): opcode = 0x53, funct7 = 0b110X00D + void FCvtWS(XRegister rd, FRegister rs1, FPRoundingMode frm); + void FCvtWD(XRegister rd, FRegister rs1, FPRoundingMode frm); + void FCvtWuS(XRegister rd, FRegister rs1, FPRoundingMode frm); + void FCvtWuD(XRegister rd, FRegister rs1, FPRoundingMode frm); + void FCvtLS(XRegister rd, FRegister rs1, FPRoundingMode frm); + void FCvtLD(XRegister rd, FRegister rs1, FPRoundingMode frm); + void FCvtLuS(XRegister rd, FRegister rs1, FPRoundingMode frm); + void FCvtLuD(XRegister rd, FRegister rs1, FPRoundingMode frm); + void FCvtSW(FRegister rd, XRegister rs1, FPRoundingMode frm); + void FCvtDW(FRegister rd, XRegister rs1, FPRoundingMode frm); + void FCvtSWu(FRegister rd, XRegister rs1, FPRoundingMode frm); + void FCvtDWu(FRegister rd, XRegister rs1, FPRoundingMode frm); + void FCvtSL(FRegister rd, XRegister rs1, FPRoundingMode frm); + void FCvtDL(FRegister rd, XRegister rs1, FPRoundingMode frm); + void FCvtSLu(FRegister rd, XRegister rs1, FPRoundingMode frm); + void FCvtDLu(FRegister rd, XRegister rs1, FPRoundingMode frm); + + // FP conversion instruction helpers passing the default rounding mode. + void FCvtWS(XRegister rd, FRegister rs1) { FCvtWS(rd, rs1, FPRoundingMode::kDefault); } + void FCvtWD(XRegister rd, FRegister rs1) { FCvtWD(rd, rs1, FPRoundingMode::kDefault); } + void FCvtWuS(XRegister rd, FRegister rs1) { FCvtWuS(rd, rs1, FPRoundingMode::kDefault); } + void FCvtWuD(XRegister rd, FRegister rs1) { FCvtWuD(rd, rs1, FPRoundingMode::kDefault); } + void FCvtLS(XRegister rd, FRegister rs1) { FCvtLS(rd, rs1, FPRoundingMode::kDefault); } + void FCvtLD(XRegister rd, FRegister rs1) { FCvtLD(rd, rs1, FPRoundingMode::kDefault); } + void FCvtLuS(XRegister rd, FRegister rs1) { FCvtLuS(rd, rs1, FPRoundingMode::kDefault); } + void FCvtLuD(XRegister rd, FRegister rs1) { FCvtLuD(rd, rs1, FPRoundingMode::kDefault); } + void FCvtSW(FRegister rd, XRegister rs1) { FCvtSW(rd, rs1, FPRoundingMode::kDefault); } + void FCvtDW(FRegister rd, XRegister rs1) { FCvtDW(rd, rs1, FPRoundingMode::kIgnored); } + void FCvtSWu(FRegister rd, XRegister rs1) { FCvtSWu(rd, rs1, FPRoundingMode::kDefault); } + void FCvtDWu(FRegister rd, XRegister rs1) { FCvtDWu(rd, rs1, FPRoundingMode::kIgnored); } + void FCvtSL(FRegister rd, XRegister rs1) { FCvtSL(rd, rs1, FPRoundingMode::kDefault); } + void FCvtDL(FRegister rd, XRegister rs1) { FCvtDL(rd, rs1, FPRoundingMode::kDefault); } + void FCvtSLu(FRegister rd, XRegister rs1) { FCvtSLu(rd, rs1, FPRoundingMode::kDefault); } + void FCvtDLu(FRegister rd, XRegister rs1) { FCvtDLu(rd, rs1, FPRoundingMode::kDefault); } + + // FP move instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x0, funct7 = 0b111X00D + void FMvXW(XRegister rd, FRegister rs1); + void FMvXD(XRegister rd, FRegister rs1); + void FMvWX(FRegister rd, XRegister rs1); + void FMvDX(FRegister rd, XRegister rs1); + + // FP classify instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x1, funct7 = 0b111X00D + void FClassS(XRegister rd, FRegister rs1); + void FClassD(XRegister rd, FRegister rs1); + + // "Zba" Standard Extension, opcode = 0x1b, 0x33 or 0x3b, funct3 and funct7 varies. + void AddUw(XRegister rd, XRegister rs1, XRegister rs2); + void Sh1Add(XRegister rd, XRegister rs1, XRegister rs2); + void Sh1AddUw(XRegister rd, XRegister rs1, XRegister rs2); + void Sh2Add(XRegister rd, XRegister rs1, XRegister rs2); + void Sh2AddUw(XRegister rd, XRegister rs1, XRegister rs2); + void Sh3Add(XRegister rd, XRegister rs1, XRegister rs2); + void Sh3AddUw(XRegister rd, XRegister rs1, XRegister rs2); + void SlliUw(XRegister rd, XRegister rs1, int32_t shamt); + + // "Zbb" Standard Extension, opcode = 0x13, 0x1b or 0x33, funct3 and funct7 varies. + // Note: We do not support 32-bit sext.b, sext.h and zext.h from the Zbb extension. + // (Neither does the clang-r498229's assembler which we currently test against.) + void Andn(XRegister rd, XRegister rs1, XRegister rs2); + void Orn(XRegister rd, XRegister rs1, XRegister rs2); + void Xnor(XRegister rd, XRegister rs1, XRegister rs2); + void Clz(XRegister rd, XRegister rs1); + void Clzw(XRegister rd, XRegister rs1); + void Ctz(XRegister rd, XRegister rs1); + void Ctzw(XRegister rd, XRegister rs1); + void Cpop(XRegister rd, XRegister rs1); + void Cpopw(XRegister rd, XRegister rs1); + void Min(XRegister rd, XRegister rs1, XRegister rs2); + void Minu(XRegister rd, XRegister rs1, XRegister rs2); + void Max(XRegister rd, XRegister rs1, XRegister rs2); + void Maxu(XRegister rd, XRegister rs1, XRegister rs2); + void Rol(XRegister rd, XRegister rs1, XRegister rs2); + void Rolw(XRegister rd, XRegister rs1, XRegister rs2); + void Ror(XRegister rd, XRegister rs1, XRegister rs2); + void Rorw(XRegister rd, XRegister rs1, XRegister rs2); + void Rori(XRegister rd, XRegister rs1, int32_t shamt); + void Roriw(XRegister rd, XRegister rs1, int32_t shamt); + void OrcB(XRegister rd, XRegister rs1); + void Rev8(XRegister rd, XRegister rs1); + + ////////////////////////////// RV64 MACRO Instructions START /////////////////////////////// + // These pseudo instructions are from "RISC-V Assembly Programmer's Manual". + + void Nop(); + void Li(XRegister rd, int64_t imm); + void Mv(XRegister rd, XRegister rs); + void Not(XRegister rd, XRegister rs); + void Neg(XRegister rd, XRegister rs); + void NegW(XRegister rd, XRegister rs); + void SextB(XRegister rd, XRegister rs); + void SextH(XRegister rd, XRegister rs); + void SextW(XRegister rd, XRegister rs); + void ZextB(XRegister rd, XRegister rs); + void ZextH(XRegister rd, XRegister rs); + void ZextW(XRegister rd, XRegister rs); + void Seqz(XRegister rd, XRegister rs); + void Snez(XRegister rd, XRegister rs); + void Sltz(XRegister rd, XRegister rs); + void Sgtz(XRegister rd, XRegister rs); + void FMvS(FRegister rd, FRegister rs); + void FAbsS(FRegister rd, FRegister rs); + void FNegS(FRegister rd, FRegister rs); + void FMvD(FRegister rd, FRegister rs); + void FAbsD(FRegister rd, FRegister rs); + void FNegD(FRegister rd, FRegister rs); + + // Branch pseudo instructions + void Beqz(XRegister rs, int32_t offset); + void Bnez(XRegister rs, int32_t offset); + void Blez(XRegister rs, int32_t offset); + void Bgez(XRegister rs, int32_t offset); + void Bltz(XRegister rs, int32_t offset); + void Bgtz(XRegister rs, int32_t offset); + void Bgt(XRegister rs, XRegister rt, int32_t offset); + void Ble(XRegister rs, XRegister rt, int32_t offset); + void Bgtu(XRegister rs, XRegister rt, int32_t offset); + void Bleu(XRegister rs, XRegister rt, int32_t offset); + + // Jump pseudo instructions + void J(int32_t offset); + void Jal(int32_t offset); + void Jr(XRegister rs); + void Jalr(XRegister rs); + void Jalr(XRegister rd, XRegister rs); + void Ret(); + + // Pseudo instructions for accessing control and status registers + void RdCycle(XRegister rd); + void RdTime(XRegister rd); + void RdInstret(XRegister rd); + void Csrr(XRegister rd, uint32_t csr); + void Csrw(uint32_t csr, XRegister rs); + void Csrs(uint32_t csr, XRegister rs); + void Csrc(uint32_t csr, XRegister rs); + void Csrwi(uint32_t csr, uint32_t uimm5); + void Csrsi(uint32_t csr, uint32_t uimm5); + void Csrci(uint32_t csr, uint32_t uimm5); + + // Load/store macros for arbitrary 32-bit offsets. + void Loadb(XRegister rd, XRegister rs1, int32_t offset); + void Loadh(XRegister rd, XRegister rs1, int32_t offset); + void Loadw(XRegister rd, XRegister rs1, int32_t offset); + void Loadd(XRegister rd, XRegister rs1, int32_t offset); + void Loadbu(XRegister rd, XRegister rs1, int32_t offset); + void Loadhu(XRegister rd, XRegister rs1, int32_t offset); + void Loadwu(XRegister rd, XRegister rs1, int32_t offset); + void Storeb(XRegister rs2, XRegister rs1, int32_t offset); + void Storeh(XRegister rs2, XRegister rs1, int32_t offset); + void Storew(XRegister rs2, XRegister rs1, int32_t offset); + void Stored(XRegister rs2, XRegister rs1, int32_t offset); + void FLoadw(FRegister rd, XRegister rs1, int32_t offset); + void FLoadd(FRegister rd, XRegister rs1, int32_t offset); + void FStorew(FRegister rs2, XRegister rs1, int32_t offset); + void FStored(FRegister rs2, XRegister rs1, int32_t offset); + + // Macros for loading constants. + void LoadConst32(XRegister rd, int32_t value); + void LoadConst64(XRegister rd, int64_t value); + + // Macros for adding constants. + void AddConst32(XRegister rd, XRegister rs1, int32_t value); + void AddConst64(XRegister rd, XRegister rs1, int64_t value); + + // Jumps and branches to a label. + void Beqz(XRegister rs, Riscv64Label* label, bool is_bare = false); + void Bnez(XRegister rs, Riscv64Label* label, bool is_bare = false); + void Blez(XRegister rs, Riscv64Label* label, bool is_bare = false); + void Bgez(XRegister rs, Riscv64Label* label, bool is_bare = false); + void Bltz(XRegister rs, Riscv64Label* label, bool is_bare = false); + void Bgtz(XRegister rs, Riscv64Label* label, bool is_bare = false); + void Beq(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Bne(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Ble(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Bge(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Blt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Bgt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Bleu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Bgeu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Bltu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Bgtu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false); + void Jal(XRegister rd, Riscv64Label* label, bool is_bare = false); + void J(Riscv64Label* label, bool is_bare = false); + void Jal(Riscv64Label* label, bool is_bare = false); + + // Literal load. + void Loadw(XRegister rd, Literal* literal); + void Loadwu(XRegister rd, Literal* literal); + void Loadd(XRegister rd, Literal* literal); + void FLoadw(FRegister rd, Literal* literal); + void FLoadd(FRegister rd, Literal* literal); + + // Illegal instruction that triggers SIGILL. + void Unimp(); + + /////////////////////////////// RV64 MACRO Instructions END /////////////////////////////// + + void Bind(Label* label) override { Bind(down_cast<Riscv64Label*>(label)); } + + void Jump([[maybe_unused]] Label* label) override { + UNIMPLEMENTED(FATAL) << "Do not use Jump for RISCV64"; + } + + void Bind(Riscv64Label* label); + + // Load label address using PC-relative loads. + void LoadLabelAddress(XRegister rd, Riscv64Label* label); + + // Create a new literal with a given value. + // NOTE:Use `Identity<>` to force the template parameter to be explicitly specified. + template <typename T> + Literal* NewLiteral(typename Identity<T>::type value) { + static_assert(std::is_integral<T>::value, "T must be an integral type."); + return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value)); + } + + // Create a new literal with the given data. + Literal* NewLiteral(size_t size, const uint8_t* data); + + // Create a jump table for the given labels that will be emitted when finalizing. + // When the table is emitted, offsets will be relative to the location of the table. + // The table location is determined by the location of its label (the label precedes + // the table data) and should be loaded using LoadLabelAddress(). + JumpTable* CreateJumpTable(ArenaVector<Riscv64Label*>&& labels); + + public: + // Emit slow paths queued during assembly, promote short branches to long if needed, + // and emit branches. + void FinalizeCode() override; + + // Returns the current location of a label. + // + // This function must be used instead of `Riscv64Label::GetPosition()` + // which returns assembler's internal data instead of an actual location. + // + // The location can change during branch fixup in `FinalizeCode()`. Before that, + // the location is not final and therefore not very useful to external users, + // so they should preferably retrieve the location only after `FinalizeCode()`. + uint32_t GetLabelLocation(const Riscv64Label* label) const; + + // Get the final position of a label after local fixup based on the old position + // recorded before FinalizeCode(). + uint32_t GetAdjustedPosition(uint32_t old_position); + + private: + enum BranchCondition : uint8_t { + kCondEQ, + kCondNE, + kCondLT, + kCondGE, + kCondLE, + kCondGT, + kCondLTU, + kCondGEU, + kCondLEU, + kCondGTU, + kUncond, + }; + + // Note that PC-relative literal loads are handled as pseudo branches because they need + // to be emitted after branch relocation to use correct offsets. + class Branch { + public: + enum Type : uint8_t { + // TODO(riscv64): Support 16-bit instructions ("C" Standard Extension). + + // Short branches (can be promoted to longer). + kCondBranch, + kUncondBranch, + kCall, + // Short branches (can't be promoted to longer). + // TODO(riscv64): Do we need these (untested) bare branches, or can we remove them? + kBareCondBranch, + kBareUncondBranch, + kBareCall, + + // Medium branch (can be promoted to long). + kCondBranch21, + + // Long branches. + kLongCondBranch, + kLongUncondBranch, + kLongCall, + + // Label. + kLabel, + + // Literals. + kLiteral, + kLiteralUnsigned, + kLiteralLong, + kLiteralFloat, + kLiteralDouble, + }; + + // Bit sizes of offsets defined as enums to minimize chance of typos. + enum OffsetBits { + kOffset13 = 13, + kOffset21 = 21, + kOffset32 = 32, + }; + + static constexpr uint32_t kUnresolved = 0xffffffff; // Unresolved target_ + static constexpr uint32_t kMaxBranchLength = 12; // In bytes. + + struct BranchInfo { + // Branch length in bytes. + uint32_t length; + // The offset in bytes of the PC used in the (only) PC-relative instruction from + // the start of the branch sequence. RISC-V always uses the address of the PC-relative + // instruction as the PC, so this is essentially the offset of that instruction. + uint32_t pc_offset; + // How large (in bits) a PC-relative offset can be for a given type of branch. + OffsetBits offset_size; + }; + static const BranchInfo branch_info_[/* Type */]; + + // Unconditional branch or call. + Branch(uint32_t location, uint32_t target, XRegister rd, bool is_bare); + // Conditional branch. + Branch(uint32_t location, + uint32_t target, + BranchCondition condition, + XRegister lhs_reg, + XRegister rhs_reg, + bool is_bare); + // Label address or literal. + Branch(uint32_t location, uint32_t target, XRegister rd, Type label_or_literal_type); + Branch(uint32_t location, uint32_t target, FRegister rd, Type literal_type); + + // Some conditional branches with lhs = rhs are effectively NOPs, while some + // others are effectively unconditional. + static bool IsNop(BranchCondition condition, XRegister lhs, XRegister rhs); + static bool IsUncond(BranchCondition condition, XRegister lhs, XRegister rhs); + + static BranchCondition OppositeCondition(BranchCondition cond); + + Type GetType() const; + BranchCondition GetCondition() const; + XRegister GetLeftRegister() const; + XRegister GetRightRegister() const; + FRegister GetFRegister() const; + uint32_t GetTarget() const; + uint32_t GetLocation() const; + uint32_t GetOldLocation() const; + uint32_t GetLength() const; + uint32_t GetOldLength() const; + uint32_t GetEndLocation() const; + uint32_t GetOldEndLocation() const; + bool IsBare() const; + bool IsResolved() const; + + // Returns the bit size of the signed offset that the branch instruction can handle. + OffsetBits GetOffsetSize() const; + + // Calculates the distance between two byte locations in the assembler buffer and + // returns the number of bits needed to represent the distance as a signed integer. + static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target); + + // Resolve a branch when the target is known. + void Resolve(uint32_t target); + + // Relocate a branch by a given delta if needed due to expansion of this or another + // branch at a given location by this delta (just changes location_ and target_). + void Relocate(uint32_t expand_location, uint32_t delta); + + // If necessary, updates the type by promoting a short branch to a longer branch + // based on the branch location and target. Returns the amount (in bytes) by + // which the branch size has increased. + uint32_t PromoteIfNeeded(); + + // Returns the offset into assembler buffer that shall be used as the base PC for + // offset calculation. RISC-V always uses the address of the PC-relative instruction + // as the PC, so this is essentially the location of that instruction. + uint32_t GetOffsetLocation() const; + + // Calculates and returns the offset ready for encoding in the branch instruction(s). + int32_t GetOffset() const; + + private: + // Completes branch construction by determining and recording its type. + void InitializeType(Type initial_type); + // Helper for the above. + void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type, Type longest_type); + + uint32_t old_location_; // Offset into assembler buffer in bytes. + uint32_t location_; // Offset into assembler buffer in bytes. + uint32_t target_; // Offset into assembler buffer in bytes. + + XRegister lhs_reg_; // Left-hand side register in conditional branches or + // destination register in calls or literals. + XRegister rhs_reg_; // Right-hand side register in conditional branches. + FRegister freg_; // Destination register in FP literals. + BranchCondition condition_; // Condition for conditional branches. + + Type type_; // Current type of the branch. + Type old_type_; // Initial type of the branch. + }; + + // Branch and literal fixup. + + void EmitBcond(BranchCondition cond, XRegister rs, XRegister rt, int32_t offset); + void EmitBranch(Branch* branch); + void EmitBranches(); + void EmitJumpTables(); + void EmitLiterals(); + + void FinalizeLabeledBranch(Riscv64Label* label); + void Bcond(Riscv64Label* label, + bool is_bare, + BranchCondition condition, + XRegister lhs, + XRegister rhs); + void Buncond(Riscv64Label* label, XRegister rd, bool is_bare); + template <typename XRegisterOrFRegister> + void LoadLiteral(Literal* literal, XRegisterOrFRegister rd, Branch::Type literal_type); + + Branch* GetBranch(uint32_t branch_id); + const Branch* GetBranch(uint32_t branch_id) const; + + void ReserveJumpTableSpace(); + void PromoteBranches(); + void PatchCFI(); + + // Emit data (e.g. encoded instruction or immediate) to the instruction stream. + void Emit(uint32_t value); + + // Adjust base register and offset if needed for load/store with a large offset. + void AdjustBaseAndOffset(XRegister& base, int32_t& offset, ScratchRegisterScope& srs); + + // Helper templates for loads/stores with 32-bit offsets. + template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)> + void LoadFromOffset(XRegister rd, XRegister rs1, int32_t offset); + template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)> + void StoreToOffset(XRegister rs2, XRegister rs1, int32_t offset); + template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)> + void FLoadFromOffset(FRegister rd, XRegister rs1, int32_t offset); + template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)> + void FStoreToOffset(FRegister rs2, XRegister rs1, int32_t offset); + + // Implementation helper for `Li()`, `LoadConst32()` and `LoadConst64()`. + void LoadImmediate(XRegister rd, int64_t imm, bool can_use_tmp); + + // Emit helpers. + + // I-type instruction: + // + // 31 20 19 15 14 12 11 7 6 0 + // ----------------------------------------------------------------- + // [ . . . . . . . . . . . | . . . . | . . | . . . . | . . . . . . ] + // [ imm11:0 rs1 funct3 rd opcode ] + // ----------------------------------------------------------------- + template <typename Reg1, typename Reg2> + void EmitI(int32_t imm12, Reg1 rs1, uint32_t funct3, Reg2 rd, uint32_t opcode) { + DCHECK(IsInt<12>(imm12)) << imm12; + DCHECK(IsUint<5>(static_cast<uint32_t>(rs1))); + DCHECK(IsUint<3>(funct3)); + DCHECK(IsUint<5>(static_cast<uint32_t>(rd))); + DCHECK(IsUint<7>(opcode)); + uint32_t encoding = static_cast<uint32_t>(imm12) << 20 | static_cast<uint32_t>(rs1) << 15 | + funct3 << 12 | static_cast<uint32_t>(rd) << 7 | opcode; + Emit(encoding); + } + + // R-type instruction: + // + // 31 25 24 20 19 15 14 12 11 7 6 0 + // ----------------------------------------------------------------- + // [ . . . . . . | . . . . | . . . . | . . | . . . . | . . . . . . ] + // [ funct7 rs2 rs1 funct3 rd opcode ] + // ----------------------------------------------------------------- + template <typename Reg1, typename Reg2, typename Reg3> + void EmitR(uint32_t funct7, Reg1 rs2, Reg2 rs1, uint32_t funct3, Reg3 rd, uint32_t opcode) { + DCHECK(IsUint<7>(funct7)); + DCHECK(IsUint<5>(static_cast<uint32_t>(rs2))); + DCHECK(IsUint<5>(static_cast<uint32_t>(rs1))); + DCHECK(IsUint<3>(funct3)); + DCHECK(IsUint<5>(static_cast<uint32_t>(rd))); + DCHECK(IsUint<7>(opcode)); + uint32_t encoding = funct7 << 25 | static_cast<uint32_t>(rs2) << 20 | + static_cast<uint32_t>(rs1) << 15 | funct3 << 12 | + static_cast<uint32_t>(rd) << 7 | opcode; + Emit(encoding); + } + + // R-type instruction variant for floating-point fused multiply-add/sub (F[N]MADD/ F[N]MSUB): + // + // 31 27 25 24 20 19 15 14 12 11 7 6 0 + // ----------------------------------------------------------------- + // [ . . . . | . | . . . . | . . . . | . . | . . . . | . . . . . . ] + // [ rs3 fmt rs2 rs1 funct3 rd opcode ] + // ----------------------------------------------------------------- + template <typename Reg1, typename Reg2, typename Reg3, typename Reg4> + void EmitR4( + Reg1 rs3, uint32_t fmt, Reg2 rs2, Reg3 rs1, uint32_t funct3, Reg4 rd, uint32_t opcode) { + DCHECK(IsUint<5>(static_cast<uint32_t>(rs3))); + DCHECK(IsUint<2>(fmt)); + DCHECK(IsUint<5>(static_cast<uint32_t>(rs2))); + DCHECK(IsUint<5>(static_cast<uint32_t>(rs1))); + DCHECK(IsUint<3>(funct3)); + DCHECK(IsUint<5>(static_cast<uint32_t>(rd))); + DCHECK(IsUint<7>(opcode)); + uint32_t encoding = static_cast<uint32_t>(rs3) << 27 | static_cast<uint32_t>(fmt) << 25 | + static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 | + static_cast<uint32_t>(funct3) << 12 | static_cast<uint32_t>(rd) << 7 | + opcode; + Emit(encoding); + } + + // S-type instruction: + // + // 31 25 24 20 19 15 14 12 11 7 6 0 + // ----------------------------------------------------------------- + // [ . . . . . . | . . . . | . . . . | . . | . . . . | . . . . . . ] + // [ imm11:5 rs2 rs1 funct3 imm4:0 opcode ] + // ----------------------------------------------------------------- + template <typename Reg1, typename Reg2> + void EmitS(int32_t imm12, Reg1 rs2, Reg2 rs1, uint32_t funct3, uint32_t opcode) { + DCHECK(IsInt<12>(imm12)) << imm12; + DCHECK(IsUint<5>(static_cast<uint32_t>(rs2))); + DCHECK(IsUint<5>(static_cast<uint32_t>(rs1))); + DCHECK(IsUint<3>(funct3)); + DCHECK(IsUint<7>(opcode)); + uint32_t encoding = (static_cast<uint32_t>(imm12) & 0xFE0) << 20 | + static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 | + static_cast<uint32_t>(funct3) << 12 | + (static_cast<uint32_t>(imm12) & 0x1F) << 7 | opcode; + Emit(encoding); + } + + // I-type instruction variant for shifts (SLLI / SRLI / SRAI): + // + // 31 26 25 20 19 15 14 12 11 7 6 0 + // ----------------------------------------------------------------- + // [ . . . . . | . . . . . | . . . . | . . | . . . . | . . . . . . ] + // [ imm11:6 imm5:0(shamt) rs1 funct3 rd opcode ] + // ----------------------------------------------------------------- + void EmitI6(uint32_t funct6, + uint32_t imm6, + XRegister rs1, + uint32_t funct3, + XRegister rd, + uint32_t opcode) { + DCHECK(IsUint<6>(funct6)); + DCHECK(IsUint<6>(imm6)) << imm6; + DCHECK(IsUint<5>(static_cast<uint32_t>(rs1))); + DCHECK(IsUint<3>(funct3)); + DCHECK(IsUint<5>(static_cast<uint32_t>(rd))); + DCHECK(IsUint<7>(opcode)); + uint32_t encoding = funct6 << 26 | static_cast<uint32_t>(imm6) << 20 | + static_cast<uint32_t>(rs1) << 15 | funct3 << 12 | + static_cast<uint32_t>(rd) << 7 | opcode; + Emit(encoding); + } + + // B-type instruction: + // + // 31 30 25 24 20 19 15 14 12 11 8 7 6 0 + // ----------------------------------------------------------------- + // [ | . . . . . | . . . . | . . . . | . . | . . . | | . . . . . . ] + // imm12 imm11:5 rs2 rs1 funct3 imm4:1 imm11 opcode ] + // ----------------------------------------------------------------- + void EmitB(int32_t offset, XRegister rs2, XRegister rs1, uint32_t funct3, uint32_t opcode) { + DCHECK_ALIGNED(offset, 2); + DCHECK(IsInt<13>(offset)) << offset; + DCHECK(IsUint<5>(static_cast<uint32_t>(rs2))); + DCHECK(IsUint<5>(static_cast<uint32_t>(rs1))); + DCHECK(IsUint<3>(funct3)); + DCHECK(IsUint<7>(opcode)); + uint32_t imm12 = (static_cast<uint32_t>(offset) >> 1) & 0xfffu; + uint32_t encoding = (imm12 & 0x800u) << (31 - 11) | (imm12 & 0x03f0u) << (25 - 4) | + static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 | + static_cast<uint32_t>(funct3) << 12 | + (imm12 & 0xfu) << 8 | (imm12 & 0x400u) >> (10 - 7) | opcode; + Emit(encoding); + } + + // U-type instruction: + // + // 31 12 11 7 6 0 + // ----------------------------------------------------------------- + // [ . . . . . . . . . . . . . . . . . . . | . . . . | . . . . . . ] + // [ imm31:12 rd opcode ] + // ----------------------------------------------------------------- + void EmitU(uint32_t imm20, XRegister rd, uint32_t opcode) { + CHECK(IsUint<20>(imm20)) << imm20; + DCHECK(IsUint<5>(static_cast<uint32_t>(rd))); + DCHECK(IsUint<7>(opcode)); + uint32_t encoding = imm20 << 12 | static_cast<uint32_t>(rd) << 7 | opcode; + Emit(encoding); + } + + // J-type instruction: + // + // 31 30 21 19 12 11 7 6 0 + // ----------------------------------------------------------------- + // [ | . . . . . . . . . | | . . . . . . . | . . . . | . . . . . . ] + // imm20 imm10:1 imm11 imm19:12 rd opcode ] + // ----------------------------------------------------------------- + void EmitJ(int32_t offset, XRegister rd, uint32_t opcode) { + DCHECK_ALIGNED(offset, 2); + CHECK(IsInt<21>(offset)) << offset; + DCHECK(IsUint<5>(static_cast<uint32_t>(rd))); + DCHECK(IsUint<7>(opcode)); + uint32_t imm20 = (static_cast<uint32_t>(offset) >> 1) & 0xfffffu; + uint32_t encoding = (imm20 & 0x80000u) << (31 - 19) | (imm20 & 0x03ffu) << 21 | + (imm20 & 0x400u) << (20 - 10) | (imm20 & 0x7f800u) << (12 - 11) | + static_cast<uint32_t>(rd) << 7 | opcode; + Emit(encoding); + } + + ArenaVector<Branch> branches_; + + // For checking that we finalize the code only once. + bool finalized_; + + // Whether appending instructions at the end of the buffer or overwriting the existing ones. + bool overwriting_; + // The current overwrite location. + uint32_t overwrite_location_; + + // Use `std::deque<>` for literal labels to allow insertions at the end + // without invalidating pointers and references to existing elements. + ArenaDeque<Literal> literals_; + ArenaDeque<Literal> long_literals_; // 64-bit literals separated for alignment reasons. + + // Jump table list. + ArenaDeque<JumpTable> jump_tables_; + + // Data for `GetAdjustedPosition()`, see the description there. + uint32_t last_position_adjustment_; + uint32_t last_old_position_; + uint32_t last_branch_id_; + + uint32_t available_scratch_core_registers_; + uint32_t available_scratch_fp_registers_; + + static constexpr uint32_t kXlen = 64; + + friend class ScratchRegisterScope; + + DISALLOW_COPY_AND_ASSIGN(Riscv64Assembler); +}; + +class ScratchRegisterScope { + public: + explicit ScratchRegisterScope(Riscv64Assembler* assembler) + : assembler_(assembler), + old_available_scratch_core_registers_(assembler->available_scratch_core_registers_), + old_available_scratch_fp_registers_(assembler->available_scratch_fp_registers_) {} + + ~ScratchRegisterScope() { + assembler_->available_scratch_core_registers_ = old_available_scratch_core_registers_; + assembler_->available_scratch_fp_registers_ = old_available_scratch_fp_registers_; + } + + // Alocate a scratch `XRegister`. There must be an available register to allocate. + XRegister AllocateXRegister() { + CHECK_NE(assembler_->available_scratch_core_registers_, 0u); + // Allocate the highest available scratch register (prefer TMP(T6) over TMP2(T5)). + uint32_t reg_num = (BitSizeOf(assembler_->available_scratch_core_registers_) - 1u) - + CLZ(assembler_->available_scratch_core_registers_); + assembler_->available_scratch_core_registers_ &= ~(1u << reg_num); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters)); + return enum_cast<XRegister>(reg_num); + } + + // Free a previously unavailable core register for use as a scratch register. + // This can be an arbitrary register, not necessarly the usual `TMP` or `TMP2`. + void FreeXRegister(XRegister reg) { + uint32_t reg_num = enum_cast<uint32_t>(reg); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters)); + CHECK_EQ((1u << reg_num) & assembler_->available_scratch_core_registers_, 0u); + assembler_->available_scratch_core_registers_ |= 1u << reg_num; + } + + // The number of available scratch core registers. + size_t AvailableXRegisters() { + return POPCOUNT(assembler_->available_scratch_core_registers_); + } + + // Make sure a core register is available for use as a scratch register. + void IncludeXRegister(XRegister reg) { + uint32_t reg_num = enum_cast<uint32_t>(reg); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters)); + assembler_->available_scratch_core_registers_ |= 1u << reg_num; + } + + // Make sure a core register is not available for use as a scratch register. + void ExcludeXRegister(XRegister reg) { + uint32_t reg_num = enum_cast<uint32_t>(reg); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters)); + assembler_->available_scratch_core_registers_ &= ~(1u << reg_num); + } + + // Alocate a scratch `FRegister`. There must be an available register to allocate. + FRegister AllocateFRegister() { + CHECK_NE(assembler_->available_scratch_fp_registers_, 0u); + // Allocate the highest available scratch register (same as for core registers). + uint32_t reg_num = (BitSizeOf(assembler_->available_scratch_fp_registers_) - 1u) - + CLZ(assembler_->available_scratch_fp_registers_); + assembler_->available_scratch_fp_registers_ &= ~(1u << reg_num); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters)); + return enum_cast<FRegister>(reg_num); + } + + // Free a previously unavailable FP register for use as a scratch register. + // This can be an arbitrary register, not necessarly the usual `FTMP`. + void FreeFRegister(FRegister reg) { + uint32_t reg_num = enum_cast<uint32_t>(reg); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters)); + CHECK_EQ((1u << reg_num) & assembler_->available_scratch_fp_registers_, 0u); + assembler_->available_scratch_fp_registers_ |= 1u << reg_num; + } + + // The number of available scratch FP registers. + size_t AvailableFRegisters() { + return POPCOUNT(assembler_->available_scratch_fp_registers_); + } + + // Make sure an FP register is available for use as a scratch register. + void IncludeFRegister(FRegister reg) { + uint32_t reg_num = enum_cast<uint32_t>(reg); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters)); + assembler_->available_scratch_fp_registers_ |= 1u << reg_num; + } + + // Make sure an FP register is not available for use as a scratch register. + void ExcludeFRegister(FRegister reg) { + uint32_t reg_num = enum_cast<uint32_t>(reg); + DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters)); + assembler_->available_scratch_fp_registers_ &= ~(1u << reg_num); + } + + private: + Riscv64Assembler* const assembler_; + const uint32_t old_available_scratch_core_registers_; + const uint32_t old_available_scratch_fp_registers_; + + DISALLOW_COPY_AND_ASSIGN(ScratchRegisterScope); +}; + +} // namespace riscv64 +} // namespace art + +#endif // ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_ diff --git a/compiler/utils/riscv64/assembler_riscv64_test.cc b/compiler/utils/riscv64/assembler_riscv64_test.cc new file mode 100644 index 0000000000..0299ac25c5 --- /dev/null +++ b/compiler/utils/riscv64/assembler_riscv64_test.cc @@ -0,0 +1,2939 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "assembler_riscv64.h" + +#include <inttypes.h> + +#include <map> + +#include "base/bit_utils.h" +#include "utils/assembler_test.h" + +#define __ GetAssembler()-> + +namespace art HIDDEN { +namespace riscv64 { + +struct RISCV64CpuRegisterCompare { + bool operator()(const XRegister& a, const XRegister& b) const { return a < b; } +}; + +class AssemblerRISCV64Test : public AssemblerTest<Riscv64Assembler, + Riscv64Label, + XRegister, + FRegister, + int32_t> { + public: + using Base = AssemblerTest<Riscv64Assembler, + Riscv64Label, + XRegister, + FRegister, + int32_t>; + + AssemblerRISCV64Test() + : instruction_set_features_(Riscv64InstructionSetFeatures::FromVariant("default", nullptr)) {} + + protected: + Riscv64Assembler* CreateAssembler(ArenaAllocator* allocator) override { + return new (allocator) Riscv64Assembler(allocator, instruction_set_features_.get()); + } + + InstructionSet GetIsa() override { return InstructionSet::kRiscv64; } + + // Clang's assembler takes advantage of certain extensions for emitting constants with `li` + // but our assembler does not. For now, we use a simple `-march` to avoid the divergence. + // TODO(riscv64): Implement these more efficient patterns in assembler. + void SetUseSimpleMarch(bool value) { + use_simple_march_ = value; + } + + std::vector<std::string> GetAssemblerCommand() override { + std::vector<std::string> result = Base::GetAssemblerCommand(); + if (use_simple_march_) { + auto it = std::find_if(result.begin(), + result.end(), + [](const std::string& s) { return StartsWith(s, "-march="); }); + CHECK(it != result.end()); + *it = "-march=rv64imafd"; + } + return result; + } + + std::vector<std::string> GetDisassemblerCommand() override { + std::vector<std::string> result = Base::GetDisassemblerCommand(); + if (use_simple_march_) { + auto it = std::find_if(result.begin(), + result.end(), + [](const std::string& s) { return StartsWith(s, "--mattr="); }); + CHECK(it != result.end()); + *it = "--mattr=+F,+D,+A"; + } + return result; + } + + void SetUpHelpers() override { + if (secondary_register_names_.empty()) { + secondary_register_names_.emplace(Zero, "zero"); + secondary_register_names_.emplace(RA, "ra"); + secondary_register_names_.emplace(SP, "sp"); + secondary_register_names_.emplace(GP, "gp"); + secondary_register_names_.emplace(TP, "tp"); + secondary_register_names_.emplace(T0, "t0"); + secondary_register_names_.emplace(T1, "t1"); + secondary_register_names_.emplace(T2, "t2"); + secondary_register_names_.emplace(S0, "s0"); // s0/fp + secondary_register_names_.emplace(S1, "s1"); + secondary_register_names_.emplace(A0, "a0"); + secondary_register_names_.emplace(A1, "a1"); + secondary_register_names_.emplace(A2, "a2"); + secondary_register_names_.emplace(A3, "a3"); + secondary_register_names_.emplace(A4, "a4"); + secondary_register_names_.emplace(A5, "a5"); + secondary_register_names_.emplace(A6, "a6"); + secondary_register_names_.emplace(A7, "a7"); + secondary_register_names_.emplace(S2, "s2"); + secondary_register_names_.emplace(S3, "s3"); + secondary_register_names_.emplace(S4, "s4"); + secondary_register_names_.emplace(S5, "s5"); + secondary_register_names_.emplace(S6, "s6"); + secondary_register_names_.emplace(S7, "s7"); + secondary_register_names_.emplace(S8, "s8"); + secondary_register_names_.emplace(S9, "s9"); + secondary_register_names_.emplace(S10, "s10"); + secondary_register_names_.emplace(S11, "s11"); + secondary_register_names_.emplace(T3, "t3"); + secondary_register_names_.emplace(T4, "t4"); + secondary_register_names_.emplace(T5, "t5"); + secondary_register_names_.emplace(T6, "t6"); + } + } + + void TearDown() override { + AssemblerTest::TearDown(); + } + + std::vector<Riscv64Label> GetAddresses() override { + UNIMPLEMENTED(FATAL) << "Feature not implemented yet"; + UNREACHABLE(); + } + + ArrayRef<const XRegister> GetRegisters() override { + static constexpr XRegister kXRegisters[] = { + Zero, + RA, + SP, + GP, + TP, + T0, + T1, + T2, + S0, + S1, + A0, + A1, + A2, + A3, + A4, + A5, + A6, + A7, + S2, + S3, + S4, + S5, + S6, + S7, + S8, + S9, + S10, + S11, + T3, + T4, + T5, + T6, + }; + return ArrayRef<const XRegister>(kXRegisters); + } + + ArrayRef<const FRegister> GetFPRegisters() override { + static constexpr FRegister kFRegisters[] = { + FT0, + FT1, + FT2, + FT3, + FT4, + FT5, + FT6, + FT7, + FS0, + FS1, + FA0, + FA1, + FA2, + FA3, + FA4, + FA5, + FA6, + FA7, + FS2, + FS3, + FS4, + FS5, + FS6, + FS7, + FS8, + FS9, + FS10, + FS11, + FT8, + FT9, + FT10, + FT11, + }; + return ArrayRef<const FRegister>(kFRegisters); + } + + std::string GetSecondaryRegisterName(const XRegister& reg) override { + CHECK(secondary_register_names_.find(reg) != secondary_register_names_.end()); + return secondary_register_names_[reg]; + } + + int32_t CreateImmediate(int64_t imm_value) override { + return dchecked_integral_cast<int32_t>(imm_value); + } + + template <typename Emit> + std::string RepeatInsn(size_t count, const std::string& insn, Emit&& emit) { + std::string result; + for (; count != 0u; --count) { + result += insn; + emit(); + } + return result; + } + + std::string EmitNops(size_t size) { + // TODO(riscv64): Support "C" Standard Extension. + DCHECK_ALIGNED(size, sizeof(uint32_t)); + const size_t num_nops = size / sizeof(uint32_t); + return RepeatInsn(num_nops, "nop\n", [&]() { __ Nop(); }); + } + + template <typename EmitLoadConst> + void TestLoadConst64(const std::string& test_name, + bool can_use_tmp, + EmitLoadConst&& emit_load_const) { + std::string expected; + // Test standard immediates. Unlike other instructions, `Li()` accepts an `int64_t` but + // this is unsupported by `CreateImmediate()`, so we cannot use `RepeatRIb()` for these. + // Note: This `CreateImmediateValuesBits()` call does not produce any values where + // `LoadConst64()` would emit different code from `Li()`. + for (int64_t value : CreateImmediateValuesBits(64, /*as_uint=*/ false)) { + emit_load_const(A0, value); + expected += "li a0, " + std::to_string(value) + "\n"; + } + // Test various registers with a few small values. + // (Even Zero is an accepted register even if that does not really load the requested value.) + for (XRegister reg : GetRegisters()) { + ScratchRegisterScope srs(GetAssembler()); + srs.ExcludeXRegister(reg); + std::string rd = GetRegisterName(reg); + emit_load_const(reg, -1); + expected += "li " + rd + ", -1\n"; + emit_load_const(reg, 0); + expected += "li " + rd + ", 0\n"; + emit_load_const(reg, 1); + expected += "li " + rd + ", 1\n"; + } + // Test some significant values. Some may just repeat the tests above but other values + // show some complex patterns, even exposing a value where clang (and therefore also this + // assembler) does not generate the shortest sequence. + // For the following values, `LoadConst64()` emits the same code as `Li()`. + int64_t test_values1[] = { + // Small values, either ADDI, ADDI+SLLI, LUI, or LUI+ADDIW. + // The ADDI+LUI is presumably used to allow shorter code for RV64C. + -4097, -4096, -4095, -2176, -2049, -2048, -2047, -1025, -1024, -1023, -2, -1, + 0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049, 2176, 4095, 4096, 4097, + // Just below std::numeric_limits<int32_t>::min() + INT64_C(-0x80000001), // LUI+ADDI + INT64_C(-0x80000800), // LUI+ADDI + INT64_C(-0x80000801), // LUI+ADDIW+SLLI+ADDI; LUI+ADDI+ADDI would be shorter. + INT64_C(-0x80000800123), // LUI+ADDIW+SLLI+ADDI + INT64_C(0x0123450000000123), // LUI+SLLI+ADDI + INT64_C(-0x7654300000000123), // LUI+SLLI+ADDI + INT64_C(0x0fffffffffff0000), // LUI+SRLI + INT64_C(0x0ffffffffffff000), // LUI+SRLI + INT64_C(0x0ffffffffffff010), // LUI+ADDIW+SRLI + INT64_C(0x0fffffffffffff10), // ADDI+SLLI+ADDI; LUI+ADDIW+SRLI would be same length. + INT64_C(0x0fffffffffffff80), // ADDI+SRLI + INT64_C(0x0ffffffff7ffff80), // LUI+ADDI+SRLI + INT64_C(0x0123450000001235), // LUI+SLLI+ADDI+SLLI+ADDI + INT64_C(0x0123450000001234), // LUI+SLLI+ADDI+SLLI + INT64_C(0x0000000fff808010), // LUI+SLLI+SRLI + INT64_C(0x00000000fff80801), // LUI+SLLI+SRLI + INT64_C(0x00000000ffffffff), // ADDI+SRLI + INT64_C(0x00000001ffffffff), // ADDI+SRLI + INT64_C(0x00000003ffffffff), // ADDI+SRLI + INT64_C(0x00000000ffc00801), // LUI+ADDIW+SLLI+ADDI + INT64_C(0x00000001fffff7fe), // ADDI+SLLI+SRLI + }; + for (int64_t value : test_values1) { + emit_load_const(A0, value); + expected += "li a0, " + std::to_string(value) + "\n"; + } + // For the following values, `LoadConst64()` emits different code than `Li()`. + std::pair<int64_t, const char*> test_values2[] = { + // Li: LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI + // LoadConst: LUI+ADDIW+LUI+ADDIW+SLLI+ADD (using TMP) + { INT64_C(0x1234567812345678), + "li {reg1}, 0x12345678 / 8\n" // Trailing zero bits in high word are handled by SLLI. + "li {reg2}, 0x12345678\n" + "slli {reg1}, {reg1}, 32 + 3\n" + "add {reg1}, {reg1}, {reg2}\n" }, + { INT64_C(0x1234567887654321), + "li {reg1}, 0x12345678 + 1\n" // One higher to compensate for negative TMP. + "li {reg2}, 0x87654321 - 0x100000000\n" + "slli {reg1}, {reg1}, 32\n" + "add {reg1}, {reg1}, {reg2}\n" }, + { INT64_C(-0x1234567887654321), + "li {reg1}, -0x12345678 - 1\n" // High 32 bits of the constant. + "li {reg2}, 0x100000000 - 0x87654321\n" // Low 32 bits of the constant. + "slli {reg1}, {reg1}, 32\n" + "add {reg1}, {reg1}, {reg2}\n" }, + + // Li: LUI+SLLI+ADDI+SLLI+ADDI+SLLI + // LoadConst: LUI+LUI+SLLI+ADD (using TMP) + { INT64_C(0x1234500012345000), + "lui {reg1}, 0x12345\n" + "lui {reg2}, 0x12345\n" + "slli {reg1}, {reg1}, 44 - 12\n" + "add {reg1}, {reg1}, {reg2}\n" }, + { INT64_C(0x0123450012345000), + "lui {reg1}, 0x12345\n" + "lui {reg2}, 0x12345\n" + "slli {reg1}, {reg1}, 40 - 12\n" + "add {reg1}, {reg1}, {reg2}\n" }, + + // Li: LUI+ADDIW+SLLI+ADDI+SLLI+ADDI + // LoadConst: LUI+LUI+ADDIW+SLLI+ADD (using TMP) + { INT64_C(0x0001234512345678), + "lui {reg1}, 0x12345\n" + "li {reg2}, 0x12345678\n" + "slli {reg1}, {reg1}, 32 - 12\n" + "add {reg1}, {reg1}, {reg2}\n" }, + { INT64_C(0x0012345012345678), + "lui {reg1}, 0x12345\n" + "li {reg2}, 0x12345678\n" + "slli {reg1}, {reg1}, 36 - 12\n" + "add {reg1}, {reg1}, {reg2}\n" }, + }; + for (auto [value, fmt] : test_values2) { + emit_load_const(A0, value); + if (can_use_tmp) { + std::string base = fmt; + ReplaceReg(REG1_TOKEN, GetRegisterName(A0), &base); + ReplaceReg(REG2_TOKEN, GetRegisterName(TMP), &base); + expected += base; + } else { + expected += "li a0, " + std::to_string(value) + "\n"; + } + } + + DriverStr(expected, test_name); + } + + auto GetPrintBcond() { + return [](const std::string& cond, + [[maybe_unused]] const std::string& opposite_cond, + const std::string& args, + const std::string& target) { + return "b" + cond + args + ", " + target + "\n"; + }; + } + + auto GetPrintBcondOppositeAndJ(const std::string& skip_label) { + return [=]([[maybe_unused]] const std::string& cond, + const std::string& opposite_cond, + const std::string& args, + const std::string& target) { + return "b" + opposite_cond + args + ", " + skip_label + "f\n" + + "j " + target + "\n" + + skip_label + ":\n"; + }; + } + + auto GetPrintBcondOppositeAndTail(const std::string& skip_label, const std::string& base_label) { + return [=]([[maybe_unused]] const std::string& cond, + const std::string& opposite_cond, + const std::string& args, + const std::string& target) { + return "b" + opposite_cond + args + ", " + skip_label + "f\n" + + base_label + ":\n" + + "auipc t6, %pcrel_hi(" + target + ")\n" + + "jalr x0, %pcrel_lo(" + base_label + "b)(t6)\n" + + skip_label + ":\n"; + }; + } + + // Helper function for basic tests that all branch conditions map to the correct opcodes, + // whether with branch expansion (a conditional branch with opposite condition over an + // unconditional branch) or without. + template <typename PrintBcond> + std::string EmitBcondForAllConditions(Riscv64Label* label, + const std::string& target, + PrintBcond&& print_bcond) { + XRegister rs = A0; + __ Beqz(rs, label); + __ Bnez(rs, label); + __ Blez(rs, label); + __ Bgez(rs, label); + __ Bltz(rs, label); + __ Bgtz(rs, label); + XRegister rt = A1; + __ Beq(rs, rt, label); + __ Bne(rs, rt, label); + __ Ble(rs, rt, label); + __ Bge(rs, rt, label); + __ Blt(rs, rt, label); + __ Bgt(rs, rt, label); + __ Bleu(rs, rt, label); + __ Bgeu(rs, rt, label); + __ Bltu(rs, rt, label); + __ Bgtu(rs, rt, label); + + return + print_bcond("eq", "ne", "z a0", target) + + print_bcond("ne", "eq", "z a0", target) + + print_bcond("le", "gt", "z a0", target) + + print_bcond("ge", "lt", "z a0", target) + + print_bcond("lt", "ge", "z a0", target) + + print_bcond("gt", "le", "z a0", target) + + print_bcond("eq", "ne", " a0, a1", target) + + print_bcond("ne", "eq", " a0, a1", target) + + print_bcond("le", "gt", " a0, a1", target) + + print_bcond("ge", "lt", " a0, a1", target) + + print_bcond("lt", "ge", " a0, a1", target) + + print_bcond("gt", "le", " a0, a1", target) + + print_bcond("leu", "gtu", " a0, a1", target) + + print_bcond("geu", "ltu", " a0, a1", target) + + print_bcond("ltu", "geu", " a0, a1", target) + + print_bcond("gtu", "leu", " a0, a1", target); + } + + // Test Bcond for forward branches with all conditions. + // The gap must be such that either all branches expand, or none does. + template <typename PrintBcond> + void TestBcondForward(const std::string& test_name, + size_t gap_size, + const std::string& target_label, + PrintBcond&& print_bcond) { + std::string expected; + Riscv64Label label; + expected += EmitBcondForAllConditions(&label, target_label + "f", print_bcond); + expected += EmitNops(gap_size); + __ Bind(&label); + expected += target_label + ":\n"; + DriverStr(expected, test_name); + } + + // Test Bcond for backward branches with all conditions. + // The gap must be such that either all branches expand, or none does. + template <typename PrintBcond> + void TestBcondBackward(const std::string& test_name, + size_t gap_size, + const std::string& target_label, + PrintBcond&& print_bcond) { + std::string expected; + Riscv64Label label; + __ Bind(&label); + expected += target_label + ":\n"; + expected += EmitNops(gap_size); + expected += EmitBcondForAllConditions(&label, target_label + "b", print_bcond); + DriverStr(expected, test_name); + } + + size_t MaxOffset13BackwardDistance() { + return 4 * KB; + } + + size_t MaxOffset13ForwardDistance() { + // TODO(riscv64): Support "C" Standard Extension, max forward distance 4KiB - 2. + return 4 * KB - 4; + } + + size_t MaxOffset21BackwardDistance() { + return 1 * MB; + } + + size_t MaxOffset21ForwardDistance() { + // TODO(riscv64): Support "C" Standard Extension, max forward distance 1MiB - 2. + return 1 * MB - 4; + } + + template <typename PrintBcond> + void TestBeqA0A1Forward(const std::string& test_name, + size_t nops_size, + const std::string& target_label, + PrintBcond&& print_bcond) { + std::string expected; + Riscv64Label label; + __ Beq(A0, A1, &label); + expected += print_bcond("eq", "ne", " a0, a1", target_label + "f"); + expected += EmitNops(nops_size); + __ Bind(&label); + expected += target_label + ":\n"; + DriverStr(expected, test_name); + } + + template <typename PrintBcond> + void TestBeqA0A1Backward(const std::string& test_name, + size_t nops_size, + const std::string& target_label, + PrintBcond&& print_bcond) { + std::string expected; + Riscv64Label label; + __ Bind(&label); + expected += target_label + ":\n"; + expected += EmitNops(nops_size); + __ Beq(A0, A1, &label); + expected += print_bcond("eq", "ne", " a0, a1", target_label + "b"); + DriverStr(expected, test_name); + } + + // Test a branch setup where expanding one branch causes expanding another branch + // which causes expanding another branch, etc. The argument `cascade` determines + // whether we push the first branch to expand, or not. + template <typename PrintBcond> + void TestBeqA0A1MaybeCascade(const std::string& test_name, + bool cascade, + PrintBcond&& print_bcond) { + const size_t kNumBeqs = MaxOffset13ForwardDistance() / sizeof(uint32_t) / 2u; + auto label_name = [](size_t i) { return ".L" + std::to_string(i); }; + + std::string expected; + std::vector<Riscv64Label> labels(kNumBeqs); + for (size_t i = 0; i != kNumBeqs; ++i) { + __ Beq(A0, A1, &labels[i]); + expected += print_bcond("eq", "ne", " a0, a1", label_name(i)); + } + if (cascade) { + expected += EmitNops(sizeof(uint32_t)); + } + for (size_t i = 0; i != kNumBeqs; ++i) { + expected += EmitNops(2 * sizeof(uint32_t)); + __ Bind(&labels[i]); + expected += label_name(i) + ":\n"; + } + DriverStr(expected, test_name); + } + + auto GetPrintJalRd() { + return [=](XRegister rd, const std::string& target) { + std::string rd_name = GetRegisterName(rd); + return "jal " + rd_name + ", " + target + "\n"; + }; + } + + auto GetPrintCallRd(const std::string& base_label) { + return [=](XRegister rd, const std::string& target) { + std::string rd_name = GetRegisterName(rd); + std::string temp_name = (rd != Zero) ? rd_name : GetRegisterName(TMP); + return base_label + ":\n" + + "auipc " + temp_name + ", %pcrel_hi(" + target + ")\n" + + "jalr " + rd_name + ", %pcrel_lo(" + base_label + "b)(" + temp_name + ")\n"; + }; + } + + template <typename PrintJalRd> + void TestJalRdForward(const std::string& test_name, + size_t gap_size, + const std::string& label_name, + PrintJalRd&& print_jalrd) { + std::string expected; + Riscv64Label label; + for (XRegister reg : GetRegisters()) { + __ Jal(reg, &label); + expected += print_jalrd(reg, label_name + "f"); + } + expected += EmitNops(gap_size); + __ Bind(&label); + expected += label_name + ":\n"; + DriverStr(expected, test_name); + } + + template <typename PrintJalRd> + void TestJalRdBackward(const std::string& test_name, + size_t gap_size, + const std::string& label_name, + PrintJalRd&& print_jalrd) { + std::string expected; + Riscv64Label label; + __ Bind(&label); + expected += label_name + ":\n"; + expected += EmitNops(gap_size); + for (XRegister reg : GetRegisters()) { + __ Jal(reg, &label); + expected += print_jalrd(reg, label_name + "b"); + } + DriverStr(expected, test_name); + } + + auto GetEmitJ() { + return [=](Riscv64Label* label) { __ J(label); }; + } + + auto GetEmitJal() { + return [=](Riscv64Label* label) { __ Jal(label); }; + } + + auto GetPrintJ() { + return [=](const std::string& target) { + return "j " + target + "\n"; + }; + } + + auto GetPrintJal() { + return [=](const std::string& target) { + return "jal " + target + "\n"; + }; + } + + auto GetPrintTail(const std::string& base_label) { + return [=](const std::string& target) { + return base_label + ":\n" + + "auipc t6, %pcrel_hi(" + target + ")\n" + + "jalr x0, %pcrel_lo(" + base_label + "b)(t6)\n"; + }; + } + + auto GetPrintCall(const std::string& base_label) { + return [=](const std::string& target) { + return base_label + ":\n" + + "auipc ra, %pcrel_hi(" + target + ")\n" + + "jalr ra, %pcrel_lo(" + base_label + "b)(ra)\n"; + }; + } + + template <typename EmitBuncond, typename PrintBuncond> + void TestBuncondForward(const std::string& test_name, + size_t gap_size, + const std::string& label_name, + EmitBuncond&& emit_buncond, + PrintBuncond&& print_buncond) { + std::string expected; + Riscv64Label label; + emit_buncond(&label); + expected += print_buncond(label_name + "f"); + expected += EmitNops(gap_size); + __ Bind(&label); + expected += label_name + ":\n"; + DriverStr(expected, test_name); + } + + template <typename EmitBuncond, typename PrintBuncond> + void TestBuncondBackward(const std::string& test_name, + size_t gap_size, + const std::string& label_name, + EmitBuncond&& emit_buncond, + PrintBuncond&& print_buncond) { + std::string expected; + Riscv64Label label; + __ Bind(&label); + expected += label_name + ":\n"; + expected += EmitNops(gap_size); + emit_buncond(&label); + expected += print_buncond(label_name + "b"); + DriverStr(expected, test_name); + } + + template <typename EmitOp> + void TestAddConst(const std::string& test_name, + size_t bits, + const std::string& suffix, + EmitOp&& emit_op) { + int64_t kImm12s[] = { + 0, 1, 2, 0xff, 0x100, 0x1ff, 0x200, 0x3ff, 0x400, 0x7ff, + -1, -2, -0x100, -0x101, -0x200, -0x201, -0x400, -0x401, -0x800, + }; + int64_t kSimplePositiveValues[] = { + 0x800, 0x801, 0xbff, 0xc00, 0xff0, 0xff7, 0xff8, 0xffb, 0xffc, 0xffd, 0xffe, + }; + int64_t kSimpleNegativeValues[] = { + -0x801, -0x802, -0xbff, -0xc00, -0xff0, -0xff8, -0xffc, -0xffe, -0xfff, -0x1000, + }; + std::vector<int64_t> large_values = CreateImmediateValuesBits(bits, /*as_uint=*/ false); + auto kept_end = std::remove_if(large_values.begin(), + large_values.end(), + [](int64_t value) { return IsInt<13>(value); }); + large_values.erase(kept_end, large_values.end()); + large_values.push_back(0xfff); + + std::string expected; + for (XRegister rd : GetRegisters()) { + std::string rd_name = GetRegisterName(rd); + std::string addi_rd = ART_FORMAT("addi{} {}, ", suffix, rd_name); + std::string add_rd = ART_FORMAT("add{} {}, ", suffix, rd_name); + for (XRegister rs1 : GetRegisters()) { + ScratchRegisterScope srs(GetAssembler()); + srs.ExcludeXRegister(rs1); + srs.ExcludeXRegister(rd); + + std::string rs1_name = GetRegisterName(rs1); + std::string tmp_name = GetRegisterName((rs1 != TMP) ? TMP : TMP2); + std::string addi_tmp = ART_FORMAT("addi{} {}, ", suffix, tmp_name); + + for (int64_t imm : kImm12s) { + emit_op(rd, rs1, imm); + expected += ART_FORMAT("{}{}, {}\n", addi_rd, rs1_name, std::to_string(imm)); + } + + auto emit_simple_ops = [&](ArrayRef<const int64_t> imms, int64_t adjustment) { + for (int64_t imm : imms) { + emit_op(rd, rs1, imm); + expected += ART_FORMAT("{}{}, {}\n", addi_tmp, rs1_name, std::to_string(adjustment)); + expected += + ART_FORMAT("{}{}, {}\n", addi_rd, tmp_name, std::to_string(imm - adjustment)); + } + }; + emit_simple_ops(ArrayRef<const int64_t>(kSimplePositiveValues), 0x7ff); + emit_simple_ops(ArrayRef<const int64_t>(kSimpleNegativeValues), -0x800); + + for (int64_t imm : large_values) { + emit_op(rd, rs1, imm); + expected += ART_FORMAT("li {}, {}\n", tmp_name, std::to_string(imm)); + expected += ART_FORMAT("{}{}, {}\n", add_rd, rs1_name, tmp_name); + } + } + } + DriverStr(expected, test_name); + } + + template <typename GetTemp, typename EmitOp> + std::string RepeatLoadStoreArbitraryOffset(const std::string& head, + GetTemp&& get_temp, + EmitOp&& emit_op) { + int64_t kImm12s[] = { + 0, 1, 2, 0xff, 0x100, 0x1ff, 0x200, 0x3ff, 0x400, 0x7ff, + -1, -2, -0x100, -0x101, -0x200, -0x201, -0x400, -0x401, -0x800, + }; + int64_t kSimplePositiveOffsetsAlign8[] = { + 0x800, 0x801, 0xbff, 0xc00, 0xff0, 0xff4, 0xff6, 0xff7 + }; + int64_t kSimplePositiveOffsetsAlign4[] = { + 0xff8, 0xff9, 0xffa, 0xffb + }; + int64_t kSimplePositiveOffsetsAlign2[] = { + 0xffc, 0xffd + }; + int64_t kSimplePositiveOffsetsNoAlign[] = { + 0xffe + }; + int64_t kSimpleNegativeOffsets[] = { + -0x801, -0x802, -0xbff, -0xc00, -0xff0, -0xff8, -0xffc, -0xffe, -0xfff, -0x1000, + }; + int64_t kSplitOffsets[] = { + 0xfff, 0x1000, 0x1001, 0x17ff, 0x1800, 0x1fff, 0x2000, 0x2001, 0x27ff, 0x2800, + 0x7fffe7ff, 0x7fffe800, 0x7fffefff, 0x7ffff000, 0x7ffff001, 0x7ffff7ff, + -0x1001, -0x1002, -0x17ff, -0x1800, -0x1801, -0x2000, -0x2001, -0x2800, -0x2801, + -0x7ffff000, -0x7ffff001, -0x7ffff800, -0x7ffff801, -0x7fffffff, -0x80000000, + }; + int64_t kSpecialOffsets[] = { + 0x7ffff800, 0x7ffff801, 0x7ffffffe, 0x7fffffff + }; + + std::string expected; + for (XRegister rs1 : GetRegisters()) { + XRegister tmp = get_temp(rs1); + if (tmp == kNoXRegister) { + continue; // Unsupported register combination. + } + std::string tmp_name = GetRegisterName(tmp); + ScratchRegisterScope srs(GetAssembler()); + srs.ExcludeXRegister(rs1); + std::string rs1_name = GetRegisterName(rs1); + + for (int64_t imm : kImm12s) { + emit_op(rs1, imm); + expected += ART_FORMAT("{}, {}({})\n", head, std::to_string(imm), rs1_name); + } + + auto emit_simple_ops = [&](ArrayRef<const int64_t> imms, int64_t adjustment) { + for (int64_t imm : imms) { + emit_op(rs1, imm); + expected += + ART_FORMAT("addi {}, {}, {}\n", tmp_name, rs1_name, std::to_string(adjustment)); + expected += ART_FORMAT("{}, {}({})\n", head, std::to_string(imm - adjustment), tmp_name); + } + }; + emit_simple_ops(ArrayRef<const int64_t>(kSimplePositiveOffsetsAlign8), 0x7f8); + emit_simple_ops(ArrayRef<const int64_t>(kSimplePositiveOffsetsAlign4), 0x7fc); + emit_simple_ops(ArrayRef<const int64_t>(kSimplePositiveOffsetsAlign2), 0x7fe); + emit_simple_ops(ArrayRef<const int64_t>(kSimplePositiveOffsetsNoAlign), 0x7ff); + emit_simple_ops(ArrayRef<const int64_t>(kSimpleNegativeOffsets), -0x800); + + for (int64_t imm : kSplitOffsets) { + emit_op(rs1, imm); + uint32_t imm20 = ((imm >> 12) + ((imm >> 11) & 1)) & 0xfffff; + int32_t small_offset = (imm & 0xfff) - ((imm & 0x800) << 1); + expected += ART_FORMAT("lui {}, {}\n", tmp_name, std::to_string(imm20)); + expected += ART_FORMAT("add {}, {}, {}\n", tmp_name, tmp_name, rs1_name); + expected += ART_FORMAT("{},{}({})\n", head, std::to_string(small_offset), tmp_name); + } + + for (int64_t imm : kSpecialOffsets) { + emit_op(rs1, imm); + expected += ART_FORMAT("lui {}, 0x80000\n", tmp_name); + expected += + ART_FORMAT("addiw {}, {}, {}\n", tmp_name, tmp_name, std::to_string(imm - 0x80000000)); + expected += ART_FORMAT("add {}, {}, {}\n", tmp_name, tmp_name, rs1_name); + expected += ART_FORMAT("{}, ({})\n", head, tmp_name); + } + } + return expected; + } + + void TestLoadStoreArbitraryOffset(const std::string& test_name, + const std::string& insn, + void (Riscv64Assembler::*fn)(XRegister, XRegister, int32_t), + bool is_store) { + std::string expected; + for (XRegister rd : GetRegisters()) { + ScratchRegisterScope srs(GetAssembler()); + srs.ExcludeXRegister(rd); + auto get_temp = [&](XRegister rs1) { + if (is_store) { + return (rs1 != TMP && rd != TMP) + ? TMP + : (rs1 != TMP2 && rd != TMP2) ? TMP2 : kNoXRegister; + } else { + return rs1 != TMP ? TMP : TMP2; + } + }; + expected += RepeatLoadStoreArbitraryOffset( + insn + " " + GetRegisterName(rd), + get_temp, + [&](XRegister rs1, int64_t offset) { (GetAssembler()->*fn)(rd, rs1, offset); }); + } + DriverStr(expected, test_name); + } + + void TestFPLoadStoreArbitraryOffset(const std::string& test_name, + const std::string& insn, + void (Riscv64Assembler::*fn)(FRegister, XRegister, int32_t)) { + std::string expected; + for (FRegister rd : GetFPRegisters()) { + expected += RepeatLoadStoreArbitraryOffset( + insn + " " + GetFPRegName(rd), + [&](XRegister rs1) { return rs1 != TMP ? TMP : TMP2; }, + [&](XRegister rs1, int64_t offset) { (GetAssembler()->*fn)(rd, rs1, offset); }); + } + DriverStr(expected, test_name); + } + + void TestLoadLiteral(const std::string& test_name, bool with_padding_for_long) { + std::string expected; + Literal* narrow_literal = __ NewLiteral<uint32_t>(0x12345678); + Literal* wide_literal = __ NewLiteral<uint64_t>(0x1234567887654321); + auto print_load = [&](const std::string& load, XRegister rd, const std::string& label) { + std::string rd_name = GetRegisterName(rd); + expected += "1:\n" + "auipc " + rd_name + ", %pcrel_hi(" + label + "f)\n" + + load + " " + rd_name + ", %pcrel_lo(1b)(" + rd_name + ")\n"; + }; + for (XRegister reg : GetRegisters()) { + if (reg != Zero) { + __ Loadw(reg, narrow_literal); + print_load("lw", reg, "2"); + __ Loadwu(reg, narrow_literal); + print_load("lwu", reg, "2"); + __ Loadd(reg, wide_literal); + print_load("ld", reg, "3"); + } + } + std::string tmp = GetRegisterName(TMP); + auto print_fp_load = [&](const std::string& load, FRegister rd, const std::string& label) { + std::string rd_name = GetFPRegName(rd); + expected += "1:\n" + "auipc " + tmp + ", %pcrel_hi(" + label + "f)\n" + + load + " " + rd_name + ", %pcrel_lo(1b)(" + tmp + ")\n"; + }; + for (FRegister freg : GetFPRegisters()) { + __ FLoadw(freg, narrow_literal); + print_fp_load("flw", freg, "2"); + __ FLoadd(freg, wide_literal); + print_fp_load("fld", freg, "3"); + } + // All literal loads above emit 8 bytes of code. The narrow literal shall emit 4 bytes of code. + // If we do not add another instruction, we shall end up with padding before the long literal. + expected += EmitNops(with_padding_for_long ? 0u : sizeof(uint32_t)); + expected += "2:\n" + ".4byte 0x12345678\n" + + std::string(with_padding_for_long ? ".4byte 0\n" : "") + + "3:\n" + ".8byte 0x1234567887654321\n"; + DriverStr(expected, test_name); + } + + std::string RepeatFFFFRoundingMode( + void (Riscv64Assembler::*f)(FRegister, FRegister, FRegister, FRegister, FPRoundingMode), + const std::string& fmt) { + CHECK(f != nullptr); + std::string str; + for (FRegister reg1 : GetFPRegisters()) { + for (FRegister reg2 : GetFPRegisters()) { + for (FRegister reg3 : GetFPRegisters()) { + for (FRegister reg4 : GetFPRegisters()) { + for (FPRoundingMode rm : kRoundingModes) { + (GetAssembler()->*f)(reg1, reg2, reg3, reg4, rm); + + std::string base = fmt; + ReplaceReg(REG1_TOKEN, GetFPRegName(reg1), &base); + ReplaceReg(REG2_TOKEN, GetFPRegName(reg2), &base); + ReplaceReg(REG3_TOKEN, GetFPRegName(reg3), &base); + ReplaceReg(REG4_TOKEN, GetFPRegName(reg4), &base); + ReplaceRoundingMode(rm, &base); + str += base; + str += "\n"; + } + } + } + } + } + return str; + } + + std::string RepeatFFFRoundingMode( + void (Riscv64Assembler::*f)(FRegister, FRegister, FRegister, FPRoundingMode), + const std::string& fmt) { + CHECK(f != nullptr); + std::string str; + for (FRegister reg1 : GetFPRegisters()) { + for (FRegister reg2 : GetFPRegisters()) { + for (FRegister reg3 : GetFPRegisters()) { + for (FPRoundingMode rm : kRoundingModes) { + (GetAssembler()->*f)(reg1, reg2, reg3, rm); + + std::string base = fmt; + ReplaceReg(REG1_TOKEN, GetFPRegName(reg1), &base); + ReplaceReg(REG2_TOKEN, GetFPRegName(reg2), &base); + ReplaceReg(REG3_TOKEN, GetFPRegName(reg3), &base); + ReplaceRoundingMode(rm, &base); + str += base; + str += "\n"; + } + } + } + } + return str; + } + + template <typename Reg1, typename Reg2> + std::string RepeatTemplatedRegistersRoundingMode( + void (Riscv64Assembler::*f)(Reg1, Reg2, FPRoundingMode), + ArrayRef<const Reg1> reg1_registers, + ArrayRef<const Reg2> reg2_registers, + std::string (Base::*GetName1)(const Reg1&), + std::string (Base::*GetName2)(const Reg2&), + const std::string& fmt) { + CHECK(f != nullptr); + std::string str; + for (Reg1 reg1 : reg1_registers) { + for (Reg2 reg2 : reg2_registers) { + for (FPRoundingMode rm : kRoundingModes) { + (GetAssembler()->*f)(reg1, reg2, rm); + + std::string base = fmt; + ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base); + ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base); + ReplaceRoundingMode(rm, &base); + str += base; + str += "\n"; + } + } + } + return str; + } + + std::string RepeatFFRoundingMode( + void (Riscv64Assembler::*f)(FRegister, FRegister, FPRoundingMode), + const std::string& fmt) { + return RepeatTemplatedRegistersRoundingMode(f, + GetFPRegisters(), + GetFPRegisters(), + &AssemblerRISCV64Test::GetFPRegName, + &AssemblerRISCV64Test::GetFPRegName, + fmt); + } + + std::string RepeatrFRoundingMode( + void (Riscv64Assembler::*f)(XRegister, FRegister, FPRoundingMode), + const std::string& fmt) { + return RepeatTemplatedRegistersRoundingMode(f, + GetRegisters(), + GetFPRegisters(), + &Base::GetSecondaryRegisterName, + &AssemblerRISCV64Test::GetFPRegName, + fmt); + } + + std::string RepeatFrRoundingMode( + void (Riscv64Assembler::*f)(FRegister, XRegister, FPRoundingMode), + const std::string& fmt) { + return RepeatTemplatedRegistersRoundingMode(f, + GetFPRegisters(), + GetRegisters(), + &AssemblerRISCV64Test::GetFPRegName, + &Base::GetSecondaryRegisterName, + fmt); + } + + template <typename InvalidAqRl> + std::string RepeatRRAqRl(void (Riscv64Assembler::*f)(XRegister, XRegister, AqRl), + const std::string& fmt, + InvalidAqRl&& invalid_aqrl) { + CHECK(f != nullptr); + std::string str; + for (XRegister reg1 : GetRegisters()) { + for (XRegister reg2 : GetRegisters()) { + for (AqRl aqrl : kAqRls) { + if (invalid_aqrl(aqrl)) { + continue; + } + (GetAssembler()->*f)(reg1, reg2, aqrl); + + std::string base = fmt; + ReplaceReg(REG1_TOKEN, GetRegisterName(reg1), &base); + ReplaceReg(REG2_TOKEN, GetRegisterName(reg2), &base); + ReplaceAqRl(aqrl, &base); + str += base; + str += "\n"; + } + } + } + return str; + } + + template <typename InvalidAqRl> + std::string RepeatRRRAqRl(void (Riscv64Assembler::*f)(XRegister, XRegister, XRegister, AqRl), + const std::string& fmt, + InvalidAqRl&& invalid_aqrl) { + CHECK(f != nullptr); + std::string str; + for (XRegister reg1 : GetRegisters()) { + for (XRegister reg2 : GetRegisters()) { + for (XRegister reg3 : GetRegisters()) { + for (AqRl aqrl : kAqRls) { + if (invalid_aqrl(aqrl)) { + continue; + } + (GetAssembler()->*f)(reg1, reg2, reg3, aqrl); + + std::string base = fmt; + ReplaceReg(REG1_TOKEN, GetRegisterName(reg1), &base); + ReplaceReg(REG2_TOKEN, GetRegisterName(reg2), &base); + ReplaceReg(REG3_TOKEN, GetRegisterName(reg3), &base); + ReplaceAqRl(aqrl, &base); + str += base; + str += "\n"; + } + } + } + } + return str; + } + + std::string RepeatRRRAqRl(void (Riscv64Assembler::*f)(XRegister, XRegister, XRegister, AqRl), + const std::string& fmt) { + return RepeatRRRAqRl(f, fmt, [](AqRl) { return false; }); + } + + std::string RepeatCsrrX(void (Riscv64Assembler::*f)(XRegister, uint32_t, XRegister), + const std::string& fmt) { + CHECK(f != nullptr); + std::vector<int64_t> csrs = CreateImmediateValuesBits(12, /*as_uint=*/ true); + std::string str; + for (XRegister reg1 : GetRegisters()) { + for (int64_t csr : csrs) { + for (XRegister reg2 : GetRegisters()) { + (GetAssembler()->*f)(reg1, dchecked_integral_cast<uint32_t>(csr), reg2); + + std::string base = fmt; + ReplaceReg(REG1_TOKEN, GetRegisterName(reg1), &base); + ReplaceCsrrImm(CSR_TOKEN, csr, &base); + ReplaceReg(REG2_TOKEN, GetRegisterName(reg2), &base); + str += base; + str += "\n"; + } + } + } + return str; + } + + std::string RepeatCsrrXi(void (Riscv64Assembler::*f)(XRegister, uint32_t, uint32_t), + const std::string& fmt) { + CHECK(f != nullptr); + std::vector<int64_t> csrs = CreateImmediateValuesBits(12, /*as_uint=*/ true); + std::vector<int64_t> uimms = CreateImmediateValuesBits(2, /*as_uint=*/ true); + std::string str; + for (XRegister reg : GetRegisters()) { + for (int64_t csr : csrs) { + for (int64_t uimm : uimms) { + (GetAssembler()->*f)( + reg, dchecked_integral_cast<uint32_t>(csr), dchecked_integral_cast<uint32_t>(uimm)); + + std::string base = fmt; + ReplaceReg(REG_TOKEN, GetRegisterName(reg), &base); + ReplaceCsrrImm(CSR_TOKEN, csr, &base); + ReplaceCsrrImm(UIMM_TOKEN, uimm, &base); + str += base; + str += "\n"; + } + } + } + return str; + } + + template <typename EmitCssrX> + void TestCsrrXMacro(const std::string& test_name, + const std::string& fmt, + EmitCssrX&& emit_csrrx) { + std::vector<int64_t> csrs = CreateImmediateValuesBits(12, /*as_uint=*/ true); + std::string expected; + for (XRegister reg : GetRegisters()) { + for (int64_t csr : csrs) { + emit_csrrx(dchecked_integral_cast<uint32_t>(csr), reg); + + std::string base = fmt; + ReplaceReg(REG_TOKEN, GetRegisterName(reg), &base); + ReplaceCsrrImm(CSR_TOKEN, csr, &base); + expected += base; + expected += "\n"; + } + } + DriverStr(expected, test_name); + } + + template <typename EmitCssrXi> + void TestCsrrXiMacro(const std::string& test_name, + const std::string& fmt, + EmitCssrXi&& emit_csrrxi) { + std::vector<int64_t> csrs = CreateImmediateValuesBits(12, /*as_uint=*/ true); + std::vector<int64_t> uimms = CreateImmediateValuesBits(2, /*as_uint=*/ true); + std::string expected; + for (int64_t csr : csrs) { + for (int64_t uimm : uimms) { + emit_csrrxi(dchecked_integral_cast<uint32_t>(csr), dchecked_integral_cast<uint32_t>(uimm)); + + std::string base = fmt; + ReplaceCsrrImm(CSR_TOKEN, csr, &base); + ReplaceCsrrImm(UIMM_TOKEN, uimm, &base); + expected += base; + expected += "\n"; + } + } + DriverStr(expected, test_name); + } + + private: + static constexpr const char* RM_TOKEN = "{rm}"; + static constexpr const char* AQRL_TOKEN = "{aqrl}"; + static constexpr const char* CSR_TOKEN = "{csr}"; + static constexpr const char* UIMM_TOKEN = "{uimm}"; + + static constexpr AqRl kAqRls[] = { AqRl::kNone, AqRl::kRelease, AqRl::kAcquire, AqRl::kAqRl }; + + static constexpr FPRoundingMode kRoundingModes[] = { + FPRoundingMode::kRNE, + FPRoundingMode::kRTZ, + FPRoundingMode::kRDN, + FPRoundingMode::kRUP, + FPRoundingMode::kRMM, + FPRoundingMode::kDYN + }; + + void ReplaceRoundingMode(FPRoundingMode rm, /*inout*/ std::string* str) { + const char* replacement; + switch (rm) { + case FPRoundingMode::kRNE: + replacement = "rne"; + break; + case FPRoundingMode::kRTZ: + replacement = "rtz"; + break; + case FPRoundingMode::kRDN: + replacement = "rdn"; + break; + case FPRoundingMode::kRUP: + replacement = "rup"; + break; + case FPRoundingMode::kRMM: + replacement = "rmm"; + break; + case FPRoundingMode::kDYN: + replacement = "dyn"; + break; + default: + LOG(FATAL) << "Unexpected value for rm: " << enum_cast<uint32_t>(rm); + UNREACHABLE(); + } + size_t rm_index = str->find(RM_TOKEN); + EXPECT_NE(rm_index, std::string::npos); + if (rm_index != std::string::npos) { + str->replace(rm_index, ConstexprStrLen(RM_TOKEN), replacement); + } + } + + void ReplaceAqRl(AqRl aqrl, /*inout*/ std::string* str) { + const char* replacement; + switch (aqrl) { + case AqRl::kNone: + replacement = ""; + break; + case AqRl::kRelease: + replacement = ".rl"; + break; + case AqRl::kAcquire: + replacement = ".aq"; + break; + case AqRl::kAqRl: + replacement = ".aqrl"; + break; + default: + LOG(FATAL) << "Unexpected value for `aqrl`: " << enum_cast<uint32_t>(aqrl); + UNREACHABLE(); + } + size_t aqrl_index = str->find(AQRL_TOKEN); + EXPECT_NE(aqrl_index, std::string::npos); + if (aqrl_index != std::string::npos) { + str->replace(aqrl_index, ConstexprStrLen(AQRL_TOKEN), replacement); + } + } + + static void ReplaceCsrrImm(const std::string& imm_token, + int64_t imm, + /*inout*/ std::string* str) { + size_t imm_index = str->find(imm_token); + EXPECT_NE(imm_index, std::string::npos); + if (imm_index != std::string::npos) { + str->replace(imm_index, imm_token.length(), std::to_string(imm)); + } + } + + std::map<XRegister, std::string, RISCV64CpuRegisterCompare> secondary_register_names_; + + std::unique_ptr<const Riscv64InstructionSetFeatures> instruction_set_features_; + bool use_simple_march_ = false; +}; + +TEST_F(AssemblerRISCV64Test, Toolchain) { EXPECT_TRUE(CheckTools()); } + +TEST_F(AssemblerRISCV64Test, Lui) { + DriverStr(RepeatRIb(&Riscv64Assembler::Lui, 20, "lui {reg}, {imm}"), "Lui"); +} + +TEST_F(AssemblerRISCV64Test, Auipc) { + DriverStr(RepeatRIb(&Riscv64Assembler::Auipc, 20, "auipc {reg}, {imm}"), "Auipc"); +} + +TEST_F(AssemblerRISCV64Test, Jal) { + // TODO(riscv64): Change "-19, 2" to "-20, 1" for "C" Standard Extension. + DriverStr(RepeatRIbS(&Riscv64Assembler::Jal, -19, 2, "jal {reg}, {imm}\n"), "Jal"); +} + +TEST_F(AssemblerRISCV64Test, Jalr) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIb(&Riscv64Assembler::Jalr, -12, "jalr {reg1}, {reg2}, {imm}\n"), "Jalr"); +} + +TEST_F(AssemblerRISCV64Test, Beq) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIbS(&Riscv64Assembler::Beq, -11, 2, "beq {reg1}, {reg2}, {imm}\n"), "Beq"); +} + +TEST_F(AssemblerRISCV64Test, Bne) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIbS(&Riscv64Assembler::Bne, -11, 2, "bne {reg1}, {reg2}, {imm}\n"), "Bne"); +} + +TEST_F(AssemblerRISCV64Test, Blt) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIbS(&Riscv64Assembler::Blt, -11, 2, "blt {reg1}, {reg2}, {imm}\n"), "Blt"); +} + +TEST_F(AssemblerRISCV64Test, Bge) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIbS(&Riscv64Assembler::Bge, -11, 2, "bge {reg1}, {reg2}, {imm}\n"), "Bge"); +} + +TEST_F(AssemblerRISCV64Test, Bltu) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIbS(&Riscv64Assembler::Bltu, -11, 2, "bltu {reg1}, {reg2}, {imm}\n"), "Bltu"); +} + +TEST_F(AssemblerRISCV64Test, Bgeu) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIbS(&Riscv64Assembler::Bgeu, -11, 2, "bgeu {reg1}, {reg2}, {imm}\n"), "Bgeu"); +} + +TEST_F(AssemblerRISCV64Test, Lb) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Lb, -12, "lb {reg1}, {imm}({reg2})"), "Lb"); +} + +TEST_F(AssemblerRISCV64Test, Lh) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Lh, -12, "lh {reg1}, {imm}({reg2})"), "Lh"); +} + +TEST_F(AssemblerRISCV64Test, Lw) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Lw, -12, "lw {reg1}, {imm}({reg2})"), "Lw"); +} + +TEST_F(AssemblerRISCV64Test, Ld) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Ld, -12, "ld {reg1}, {imm}({reg2})"), "Ld"); +} + +TEST_F(AssemblerRISCV64Test, Lbu) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Lbu, -12, "lbu {reg1}, {imm}({reg2})"), "Lbu"); +} + +TEST_F(AssemblerRISCV64Test, Lhu) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Lhu, -12, "lhu {reg1}, {imm}({reg2})"), "Lhu"); +} + +TEST_F(AssemblerRISCV64Test, Lwu) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Lwu, -12, "lwu {reg1}, {imm}({reg2})"), "Lwu"); +} + +TEST_F(AssemblerRISCV64Test, Sb) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Sb, -12, "sb {reg1}, {imm}({reg2})"), "Sb"); +} + +TEST_F(AssemblerRISCV64Test, Sh) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Sh, -12, "sh {reg1}, {imm}({reg2})"), "Sh"); +} + +TEST_F(AssemblerRISCV64Test, Sw) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Sw, -12, "sw {reg1}, {imm}({reg2})"), "Sw"); +} + +TEST_F(AssemblerRISCV64Test, Sd) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Sd, -12, "sd {reg1}, {imm}({reg2})"), "Sd"); +} + +TEST_F(AssemblerRISCV64Test, Addi) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Addi, -12, "addi {reg1}, {reg2}, {imm}"), "Addi"); +} + +TEST_F(AssemblerRISCV64Test, Slti) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Slti, -12, "slti {reg1}, {reg2}, {imm}"), "Slti"); +} + +TEST_F(AssemblerRISCV64Test, Sltiu) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Sltiu, -12, "sltiu {reg1}, {reg2}, {imm}"), "Sltiu"); +} + +TEST_F(AssemblerRISCV64Test, Xori) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Xori, 11, "xori {reg1}, {reg2}, {imm}"), "Xori"); +} + +TEST_F(AssemblerRISCV64Test, Ori) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Ori, -12, "ori {reg1}, {reg2}, {imm}"), "Ori"); +} + +TEST_F(AssemblerRISCV64Test, Andi) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Andi, -12, "andi {reg1}, {reg2}, {imm}"), "Andi"); +} + +TEST_F(AssemblerRISCV64Test, Slli) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Slli, 6, "slli {reg1}, {reg2}, {imm}"), "Slli"); +} + +TEST_F(AssemblerRISCV64Test, Srli) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Srli, 6, "srli {reg1}, {reg2}, {imm}"), "Srli"); +} + +TEST_F(AssemblerRISCV64Test, Srai) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Srai, 6, "srai {reg1}, {reg2}, {imm}"), "Srai"); +} + +TEST_F(AssemblerRISCV64Test, Add) { + DriverStr(RepeatRRR(&Riscv64Assembler::Add, "add {reg1}, {reg2}, {reg3}"), "Add"); +} + +TEST_F(AssemblerRISCV64Test, Sub) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sub, "sub {reg1}, {reg2}, {reg3}"), "Sub"); +} + +TEST_F(AssemblerRISCV64Test, Slt) { + DriverStr(RepeatRRR(&Riscv64Assembler::Slt, "slt {reg1}, {reg2}, {reg3}"), "Slt"); +} + +TEST_F(AssemblerRISCV64Test, Sltu) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sltu, "sltu {reg1}, {reg2}, {reg3}"), "Sltu"); +} + +TEST_F(AssemblerRISCV64Test, Xor) { + DriverStr(RepeatRRR(&Riscv64Assembler::Xor, "xor {reg1}, {reg2}, {reg3}"), "Xor"); +} + +TEST_F(AssemblerRISCV64Test, Or) { + DriverStr(RepeatRRR(&Riscv64Assembler::Or, "or {reg1}, {reg2}, {reg3}"), "Or"); +} + +TEST_F(AssemblerRISCV64Test, And) { + DriverStr(RepeatRRR(&Riscv64Assembler::And, "and {reg1}, {reg2}, {reg3}"), "And"); +} + +TEST_F(AssemblerRISCV64Test, Sll) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sll, "sll {reg1}, {reg2}, {reg3}"), "Sll"); +} + +TEST_F(AssemblerRISCV64Test, Srl) { + DriverStr(RepeatRRR(&Riscv64Assembler::Srl, "srl {reg1}, {reg2}, {reg3}"), "Srl"); +} + +TEST_F(AssemblerRISCV64Test, Sra) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sra, "sra {reg1}, {reg2}, {reg3}"), "Sra"); +} + +TEST_F(AssemblerRISCV64Test, Addiw) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Addiw, -12, "addiw {reg1}, {reg2}, {imm}"), "Addiw"); +} + +TEST_F(AssemblerRISCV64Test, Slliw) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Slliw, 5, "slliw {reg1}, {reg2}, {imm}"), "Slliw"); +} + +TEST_F(AssemblerRISCV64Test, Srliw) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Srliw, 5, "srliw {reg1}, {reg2}, {imm}"), "Srliw"); +} + +TEST_F(AssemblerRISCV64Test, Sraiw) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Sraiw, 5, "sraiw {reg1}, {reg2}, {imm}"), "Sraiw"); +} + +TEST_F(AssemblerRISCV64Test, Addw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Addw, "addw {reg1}, {reg2}, {reg3}"), "Addw"); +} + +TEST_F(AssemblerRISCV64Test, Subw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Subw, "subw {reg1}, {reg2}, {reg3}"), "Subw"); +} + +TEST_F(AssemblerRISCV64Test, Sllw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sllw, "sllw {reg1}, {reg2}, {reg3}"), "Sllw"); +} + +TEST_F(AssemblerRISCV64Test, Srlw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Srlw, "srlw {reg1}, {reg2}, {reg3}"), "Srlw"); +} + +TEST_F(AssemblerRISCV64Test, Sraw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sraw, "sraw {reg1}, {reg2}, {reg3}"), "Sraw"); +} + +TEST_F(AssemblerRISCV64Test, Ecall) { + __ Ecall(); + DriverStr("ecall\n", "Ecall"); +} + +TEST_F(AssemblerRISCV64Test, Ebreak) { + __ Ebreak(); + DriverStr("ebreak\n", "Ebreak"); +} + +TEST_F(AssemblerRISCV64Test, Fence) { + auto get_fence_type_string = [](uint32_t fence_type) { + CHECK_LE(fence_type, 0xfu); + std::string result; + if ((fence_type & kFenceInput) != 0u) { + result += "i"; + } + if ((fence_type & kFenceOutput) != 0u) { + result += "o"; + } + if ((fence_type & kFenceRead) != 0u) { + result += "r"; + } + if ((fence_type & kFenceWrite) != 0u) { + result += "w"; + } + if (result.empty()) { + result += "0"; + } + return result; + }; + + std::string expected; + // Note: The `pred` and `succ` are 4 bits each. + // Some combinations are not really useful but the assembler can emit them all. + for (uint32_t pred = 0u; pred != 0x10; ++pred) { + for (uint32_t succ = 0u; succ != 0x10; ++succ) { + __ Fence(pred, succ); + expected += + "fence " + get_fence_type_string(pred) + ", " + get_fence_type_string(succ) + "\n"; + } + } + DriverStr(expected, "Fence"); +} + +TEST_F(AssemblerRISCV64Test, FenceTso) { + __ FenceTso(); + DriverStr("fence.tso", "FenceTso"); +} + +TEST_F(AssemblerRISCV64Test, FenceI) { + __ FenceI(); + DriverStr("fence.i", "FenceI"); +} + +TEST_F(AssemblerRISCV64Test, Mul) { + DriverStr(RepeatRRR(&Riscv64Assembler::Mul, "mul {reg1}, {reg2}, {reg3}"), "Mul"); +} + +TEST_F(AssemblerRISCV64Test, Mulh) { + DriverStr(RepeatRRR(&Riscv64Assembler::Mulh, "mulh {reg1}, {reg2}, {reg3}"), "Mulh"); +} + +TEST_F(AssemblerRISCV64Test, Mulhsu) { + DriverStr(RepeatRRR(&Riscv64Assembler::Mulhsu, "mulhsu {reg1}, {reg2}, {reg3}"), "Mulhsu"); +} + +TEST_F(AssemblerRISCV64Test, Mulhu) { + DriverStr(RepeatRRR(&Riscv64Assembler::Mulhu, "mulhu {reg1}, {reg2}, {reg3}"), "Mulhu"); +} + +TEST_F(AssemblerRISCV64Test, Div) { + DriverStr(RepeatRRR(&Riscv64Assembler::Div, "div {reg1}, {reg2}, {reg3}"), "Div"); +} + +TEST_F(AssemblerRISCV64Test, Divu) { + DriverStr(RepeatRRR(&Riscv64Assembler::Divu, "divu {reg1}, {reg2}, {reg3}"), "Divu"); +} + +TEST_F(AssemblerRISCV64Test, Rem) { + DriverStr(RepeatRRR(&Riscv64Assembler::Rem, "rem {reg1}, {reg2}, {reg3}"), "Rem"); +} + +TEST_F(AssemblerRISCV64Test, Remu) { + DriverStr(RepeatRRR(&Riscv64Assembler::Remu, "remu {reg1}, {reg2}, {reg3}"), "Remu"); +} + +TEST_F(AssemblerRISCV64Test, Mulw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Mulw, "mulw {reg1}, {reg2}, {reg3}"), "Mulw"); +} + +TEST_F(AssemblerRISCV64Test, Divw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Divw, "divw {reg1}, {reg2}, {reg3}"), "Divw"); +} + +TEST_F(AssemblerRISCV64Test, Divuw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Divuw, "divuw {reg1}, {reg2}, {reg3}"), "Divuw"); +} + +TEST_F(AssemblerRISCV64Test, Remw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Remw, "remw {reg1}, {reg2}, {reg3}"), "Remw"); +} + +TEST_F(AssemblerRISCV64Test, Remuw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Remuw, "remuw {reg1}, {reg2}, {reg3}"), "Remuw"); +} + +TEST_F(AssemblerRISCV64Test, LrW) { + auto invalid_aqrl = [](AqRl aqrl) { return aqrl == AqRl::kRelease; }; + DriverStr(RepeatRRAqRl(&Riscv64Assembler::LrW, "lr.w{aqrl} {reg1}, ({reg2})", invalid_aqrl), + "LrW"); +} + +TEST_F(AssemblerRISCV64Test, LrD) { + auto invalid_aqrl = [](AqRl aqrl) { return aqrl == AqRl::kRelease; }; + DriverStr(RepeatRRAqRl(&Riscv64Assembler::LrD, "lr.d{aqrl} {reg1}, ({reg2})", invalid_aqrl), + "LrD"); +} + +TEST_F(AssemblerRISCV64Test, ScW) { + auto invalid_aqrl = [](AqRl aqrl) { return aqrl == AqRl::kAcquire; }; + DriverStr( + RepeatRRRAqRl(&Riscv64Assembler::ScW, "sc.w{aqrl} {reg1}, {reg2}, ({reg3})", invalid_aqrl), + "ScW"); +} + +TEST_F(AssemblerRISCV64Test, ScD) { + auto invalid_aqrl = [](AqRl aqrl) { return aqrl == AqRl::kAcquire; }; + DriverStr( + RepeatRRRAqRl(&Riscv64Assembler::ScD, "sc.d{aqrl} {reg1}, {reg2}, ({reg3})", invalid_aqrl), + "ScD"); +} + +TEST_F(AssemblerRISCV64Test, AmoSwapW) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoSwapW, "amoswap.w{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoSwapW"); +} + +TEST_F(AssemblerRISCV64Test, AmoSwapD) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoSwapD, "amoswap.d{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoSwapD"); +} + +TEST_F(AssemblerRISCV64Test, AmoAddW) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoAddW, "amoadd.w{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoAddW"); +} + +TEST_F(AssemblerRISCV64Test, AmoAddD) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoAddD, "amoadd.d{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoAddD"); +} + +TEST_F(AssemblerRISCV64Test, AmoXorW) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoXorW, "amoxor.w{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoXorW"); +} + +TEST_F(AssemblerRISCV64Test, AmoXorD) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoXorD, "amoxor.d{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoXorD"); +} + +TEST_F(AssemblerRISCV64Test, AmoAndW) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoAndW, "amoand.w{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoAndW"); +} + +TEST_F(AssemblerRISCV64Test, AmoAndD) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoAndD, "amoand.d{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoAndD"); +} + +TEST_F(AssemblerRISCV64Test, AmoOrW) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoOrW, "amoor.w{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoOrW"); +} + +TEST_F(AssemblerRISCV64Test, AmoOrD) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoOrD, "amoor.d{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoOrD"); +} + +TEST_F(AssemblerRISCV64Test, AmoMinW) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMinW, "amomin.w{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoMinW"); +} + +TEST_F(AssemblerRISCV64Test, AmoMinD) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMinD, "amomin.d{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoMinD"); +} + +TEST_F(AssemblerRISCV64Test, AmoMaxW) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMaxW, "amomax.w{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoMaxW"); +} + +TEST_F(AssemblerRISCV64Test, AmoMaxD) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMaxD, "amomax.d{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoMaxD"); +} + +TEST_F(AssemblerRISCV64Test, AmoMinuW) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMinuW, "amominu.w{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoMinuW"); +} + +TEST_F(AssemblerRISCV64Test, AmoMinuD) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMinuD, "amominu.d{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoMinuD"); +} + +TEST_F(AssemblerRISCV64Test, AmoMaxuW) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMaxuW, "amomaxu.w{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoMaxuW"); +} + +TEST_F(AssemblerRISCV64Test, AmoMaxuD) { + DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMaxuD, "amomaxu.d{aqrl} {reg1}, {reg2}, ({reg3})"), + "AmoMaxuD"); +} + +TEST_F(AssemblerRISCV64Test, Csrrw) { + DriverStr(RepeatCsrrX(&Riscv64Assembler::Csrrw, "csrrw {reg1}, {csr}, {reg2}"), "Csrrw"); +} + +TEST_F(AssemblerRISCV64Test, Csrrs) { + DriverStr(RepeatCsrrX(&Riscv64Assembler::Csrrs, "csrrs {reg1}, {csr}, {reg2}"), "Csrrs"); +} + +TEST_F(AssemblerRISCV64Test, Csrrc) { + DriverStr(RepeatCsrrX(&Riscv64Assembler::Csrrc, "csrrc {reg1}, {csr}, {reg2}"), "Csrrc"); +} + +TEST_F(AssemblerRISCV64Test, Csrrwi) { + DriverStr(RepeatCsrrXi(&Riscv64Assembler::Csrrwi, "csrrwi {reg}, {csr}, {uimm}"), "Csrrwi"); +} + +TEST_F(AssemblerRISCV64Test, Csrrsi) { + DriverStr(RepeatCsrrXi(&Riscv64Assembler::Csrrsi, "csrrsi {reg}, {csr}, {uimm}"), "Csrrsi"); +} + +TEST_F(AssemblerRISCV64Test, Csrrci) { + DriverStr(RepeatCsrrXi(&Riscv64Assembler::Csrrci, "csrrci {reg}, {csr}, {uimm}"), "Csrrci"); +} + +TEST_F(AssemblerRISCV64Test, FLw) { + DriverStr(RepeatFRIb(&Riscv64Assembler::FLw, -12, "flw {reg1}, {imm}({reg2})"), "FLw"); +} + +TEST_F(AssemblerRISCV64Test, FLd) { + DriverStr(RepeatFRIb(&Riscv64Assembler::FLd, -12, "fld {reg1}, {imm}({reg2})"), "FLw"); +} + +TEST_F(AssemblerRISCV64Test, FSw) { + DriverStr(RepeatFRIb(&Riscv64Assembler::FSw, 2, "fsw {reg1}, {imm}({reg2})"), "FSw"); +} + +TEST_F(AssemblerRISCV64Test, FSd) { + DriverStr(RepeatFRIb(&Riscv64Assembler::FSd, 2, "fsd {reg1}, {imm}({reg2})"), "FSd"); +} + +TEST_F(AssemblerRISCV64Test, FMAddS) { + DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FMAddS, + "fmadd.s {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FMAddS"); +} + +TEST_F(AssemblerRISCV64Test, FMAddS_Default) { + DriverStr(RepeatFFFF(&Riscv64Assembler::FMAddS, "fmadd.s {reg1}, {reg2}, {reg3}, {reg4}"), + "FMAddS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FMAddD) { + DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FMAddD, + "fmadd.d {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FMAddD"); +} + +TEST_F(AssemblerRISCV64Test, FMAddD_Default) { + DriverStr(RepeatFFFF(&Riscv64Assembler::FMAddD, "fmadd.d {reg1}, {reg2}, {reg3}, {reg4}"), + "FMAddD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FMSubS) { + DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FMSubS, + "fmsub.s {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FMSubS"); +} + +TEST_F(AssemblerRISCV64Test, FMSubS_Default) { + DriverStr(RepeatFFFF(&Riscv64Assembler::FMSubS, "fmsub.s {reg1}, {reg2}, {reg3}, {reg4}"), + "FMSubS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FMSubD) { + DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FMSubD, + "fmsub.d {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FMSubD"); +} + +TEST_F(AssemblerRISCV64Test, FMSubD_Default) { + DriverStr(RepeatFFFF(&Riscv64Assembler::FMSubD, "fmsub.d {reg1}, {reg2}, {reg3}, {reg4}"), + "FMSubD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FNMSubS) { + DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FNMSubS, + "fnmsub.s {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FNMSubS"); +} + +TEST_F(AssemblerRISCV64Test, FNMSubS_Default) { + DriverStr(RepeatFFFF(&Riscv64Assembler::FNMSubS, "fnmsub.s {reg1}, {reg2}, {reg3}, {reg4}"), + "FNMSubS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FNMSubD) { + DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FNMSubD, + "fnmsub.d {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FNMSubD"); +} + +TEST_F(AssemblerRISCV64Test, FNMSubD_Default) { + DriverStr(RepeatFFFF(&Riscv64Assembler::FNMSubD, "fnmsub.d {reg1}, {reg2}, {reg3}, {reg4}"), + "FNMSubD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FNMAddS) { + DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FNMAddS, + "fnmadd.s {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FNMAddS"); +} + +TEST_F(AssemblerRISCV64Test, FNMAddS_Default) { + DriverStr(RepeatFFFF(&Riscv64Assembler::FNMAddS, "fnmadd.s {reg1}, {reg2}, {reg3}, {reg4}"), + "FNMAddS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FNMAddD) { + DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FNMAddD, + "fnmadd.d {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FNMAddD"); +} + +TEST_F(AssemblerRISCV64Test, FNMAddD_Default) { + DriverStr(RepeatFFFF(&Riscv64Assembler::FNMAddD, "fnmadd.d {reg1}, {reg2}, {reg3}, {reg4}"), + "FNMAddD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FAddS) { + DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FAddS, "fadd.s {reg1}, {reg2}, {reg3}, {rm}"), + "FAddS"); +} + +TEST_F(AssemblerRISCV64Test, FAddS_Default) { + DriverStr(RepeatFFF(&Riscv64Assembler::FAddS, "fadd.s {reg1}, {reg2}, {reg3}"), "FAddS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FAddD) { + DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FAddD, "fadd.d {reg1}, {reg2}, {reg3}, {rm}"), + "FAddD"); +} + +TEST_F(AssemblerRISCV64Test, FAddD_Default) { + DriverStr(RepeatFFF(&Riscv64Assembler::FAddD, "fadd.d {reg1}, {reg2}, {reg3}"), "FAddD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FSubS) { + DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FSubS, "fsub.s {reg1}, {reg2}, {reg3}, {rm}"), + "FSubS"); +} + +TEST_F(AssemblerRISCV64Test, FSubS_Default) { + DriverStr(RepeatFFF(&Riscv64Assembler::FSubS, "fsub.s {reg1}, {reg2}, {reg3}"), "FSubS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FSubD) { + DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FSubD, "fsub.d {reg1}, {reg2}, {reg3}, {rm}"), + "FSubD"); +} + +TEST_F(AssemblerRISCV64Test, FSubD_Default) { + DriverStr(RepeatFFF(&Riscv64Assembler::FSubD, "fsub.d {reg1}, {reg2}, {reg3}"), "FSubD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FMulS) { + DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FMulS, "fmul.s {reg1}, {reg2}, {reg3}, {rm}"), + "FMulS"); +} + +TEST_F(AssemblerRISCV64Test, FMulS_Default) { + DriverStr(RepeatFFF(&Riscv64Assembler::FMulS, "fmul.s {reg1}, {reg2}, {reg3}"), "FMulS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FMulD) { + DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FMulD, "fmul.d {reg1}, {reg2}, {reg3}, {rm}"), + "FMulD"); +} + +TEST_F(AssemblerRISCV64Test, FMulD_Default) { + DriverStr(RepeatFFF(&Riscv64Assembler::FMulD, "fmul.d {reg1}, {reg2}, {reg3}"), "FMulD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FDivS) { + DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FDivS, "fdiv.s {reg1}, {reg2}, {reg3}, {rm}"), + "FDivS"); +} + +TEST_F(AssemblerRISCV64Test, FDivS_Default) { + DriverStr(RepeatFFF(&Riscv64Assembler::FDivS, "fdiv.s {reg1}, {reg2}, {reg3}"), "FDivS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FDivD) { + DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FDivD, "fdiv.d {reg1}, {reg2}, {reg3}, {rm}"), + "FDivD"); +} + +TEST_F(AssemblerRISCV64Test, FDivD_Default) { + DriverStr(RepeatFFF(&Riscv64Assembler::FDivD, "fdiv.d {reg1}, {reg2}, {reg3}"), "FDivD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FSqrtS) { + DriverStr(RepeatFFRoundingMode(&Riscv64Assembler::FSqrtS, "fsqrt.s {reg1}, {reg2}, {rm}"), + "FSqrtS"); +} + +TEST_F(AssemblerRISCV64Test, FSqrtS_Default) { + DriverStr(RepeatFF(&Riscv64Assembler::FSqrtS, "fsqrt.s {reg1}, {reg2}"), "FSqrtS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FSqrtD) { + DriverStr(RepeatFFRoundingMode(&Riscv64Assembler::FSqrtD, "fsqrt.d {reg1}, {reg2}, {rm}"), + "FSqrtD"); +} + +TEST_F(AssemblerRISCV64Test, FSqrtD_Default) { + DriverStr(RepeatFF(&Riscv64Assembler::FSqrtD, "fsqrt.d {reg1}, {reg2}"), "FSqrtD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FSgnjS) { + DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjS, "fsgnj.s {reg1}, {reg2}, {reg3}"), "FSgnjS"); +} + +TEST_F(AssemblerRISCV64Test, FSgnjD) { + DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjD, "fsgnj.d {reg1}, {reg2}, {reg3}"), "FSgnjD"); +} + +TEST_F(AssemblerRISCV64Test, FSgnjnS) { + DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjnS, "fsgnjn.s {reg1}, {reg2}, {reg3}"), "FSgnjnS"); +} + +TEST_F(AssemblerRISCV64Test, FSgnjnD) { + DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjnD, "fsgnjn.d {reg1}, {reg2}, {reg3}"), "FSgnjnD"); +} + +TEST_F(AssemblerRISCV64Test, FSgnjxS) { + DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjxS, "fsgnjx.s {reg1}, {reg2}, {reg3}"), "FSgnjxS"); +} + +TEST_F(AssemblerRISCV64Test, FSgnjxD) { + DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjxD, "fsgnjx.d {reg1}, {reg2}, {reg3}"), "FSgnjxD"); +} + +TEST_F(AssemblerRISCV64Test, FMinS) { + DriverStr(RepeatFFF(&Riscv64Assembler::FMinS, "fmin.s {reg1}, {reg2}, {reg3}"), "FMinS"); +} + +TEST_F(AssemblerRISCV64Test, FMinD) { + DriverStr(RepeatFFF(&Riscv64Assembler::FMinD, "fmin.d {reg1}, {reg2}, {reg3}"), "FMinD"); +} + +TEST_F(AssemblerRISCV64Test, FMaxS) { + DriverStr(RepeatFFF(&Riscv64Assembler::FMaxS, "fmax.s {reg1}, {reg2}, {reg3}"), "FMaxS"); +} + +TEST_F(AssemblerRISCV64Test, FMaxD) { + DriverStr(RepeatFFF(&Riscv64Assembler::FMaxD, "fmax.d {reg1}, {reg2}, {reg3}"), "FMaxD"); +} + +TEST_F(AssemblerRISCV64Test, FCvtSD) { + DriverStr(RepeatFFRoundingMode(&Riscv64Assembler::FCvtSD, "fcvt.s.d {reg1}, {reg2}, {rm}"), + "FCvtSD"); +} + +TEST_F(AssemblerRISCV64Test, FCvtSD_Default) { + DriverStr(RepeatFF(&Riscv64Assembler::FCvtSD, "fcvt.s.d {reg1}, {reg2}"), "FCvtSD_Default"); +} + +// This conversion is lossless, so the rounding mode is meaningless and the assembler we're +// testing against does not even accept the rounding mode argument, so this test is disabled. +TEST_F(AssemblerRISCV64Test, DISABLED_FCvtDS) { + DriverStr(RepeatFFRoundingMode(&Riscv64Assembler::FCvtDS, "fcvt.d.s {reg1}, {reg2}, {rm}"), + "FCvtDS"); +} + +TEST_F(AssemblerRISCV64Test, FCvtDS_Default) { + DriverStr(RepeatFF(&Riscv64Assembler::FCvtDS, "fcvt.d.s {reg1}, {reg2}"), "FCvtDS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FEqS) { + DriverStr(RepeatRFF(&Riscv64Assembler::FEqS, "feq.s {reg1}, {reg2}, {reg3}"), "FEqS"); +} + +TEST_F(AssemblerRISCV64Test, FEqD) { + DriverStr(RepeatRFF(&Riscv64Assembler::FEqD, "feq.d {reg1}, {reg2}, {reg3}"), "FEqD"); +} + +TEST_F(AssemblerRISCV64Test, FLtS) { + DriverStr(RepeatRFF(&Riscv64Assembler::FLtS, "flt.s {reg1}, {reg2}, {reg3}"), "FLtS"); +} + +TEST_F(AssemblerRISCV64Test, FLtD) { + DriverStr(RepeatRFF(&Riscv64Assembler::FLtD, "flt.d {reg1}, {reg2}, {reg3}"), "FLtD"); +} + +TEST_F(AssemblerRISCV64Test, FLeS) { + DriverStr(RepeatRFF(&Riscv64Assembler::FLeS, "fle.s {reg1}, {reg2}, {reg3}"), "FLeS"); +} + +TEST_F(AssemblerRISCV64Test, FLeD) { + DriverStr(RepeatRFF(&Riscv64Assembler::FLeD, "fle.d {reg1}, {reg2}, {reg3}"), "FLeD"); +} + +TEST_F(AssemblerRISCV64Test, FCvtWS) { + DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtWS, "fcvt.w.s {reg1}, {reg2}, {rm}"), + "FCvtWS"); +} + +TEST_F(AssemblerRISCV64Test, FCvtWS_Default) { + DriverStr(RepeatrF(&Riscv64Assembler::FCvtWS, "fcvt.w.s {reg1}, {reg2}"), "FCvtWS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtWD) { + DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtWD, "fcvt.w.d {reg1}, {reg2}, {rm}"), + "FCvtWD"); +} + +TEST_F(AssemblerRISCV64Test, FCvtWD_Default) { + DriverStr(RepeatrF(&Riscv64Assembler::FCvtWD, "fcvt.w.d {reg1}, {reg2}"), "FCvtWD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtWuS) { + DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtWuS, "fcvt.wu.s {reg1}, {reg2}, {rm}"), + "FCvtWuS"); +} + +TEST_F(AssemblerRISCV64Test, FCvtWuS_Default) { + DriverStr(RepeatrF(&Riscv64Assembler::FCvtWuS, "fcvt.wu.s {reg1}, {reg2}"), "FCvtWuS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtWuD) { + DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtWuD, "fcvt.wu.d {reg1}, {reg2}, {rm}"), + "FCvtWuD"); +} + +TEST_F(AssemblerRISCV64Test, FCvtWuD_Default) { + DriverStr(RepeatrF(&Riscv64Assembler::FCvtWuD, "fcvt.wu.d {reg1}, {reg2}"), "FCvtWuD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtLS) { + DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtLS, "fcvt.l.s {reg1}, {reg2}, {rm}"), + "FCvtLS"); +} + +TEST_F(AssemblerRISCV64Test, FCvtLS_Default) { + DriverStr(RepeatrF(&Riscv64Assembler::FCvtLS, "fcvt.l.s {reg1}, {reg2}"), "FCvtLS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtLD) { + DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtLD, "fcvt.l.d {reg1}, {reg2}, {rm}"), + "FCvtLD"); +} + +TEST_F(AssemblerRISCV64Test, FCvtLD_Default) { + DriverStr(RepeatrF(&Riscv64Assembler::FCvtLD, "fcvt.l.d {reg1}, {reg2}"), "FCvtLD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtLuS) { + DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtLuS, "fcvt.lu.s {reg1}, {reg2}, {rm}"), + "FCvtLuS"); +} + +TEST_F(AssemblerRISCV64Test, FCvtLuS_Default) { + DriverStr(RepeatrF(&Riscv64Assembler::FCvtLuS, "fcvt.lu.s {reg1}, {reg2}"), "FCvtLuS_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtLuD) { + DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtLuD, "fcvt.lu.d {reg1}, {reg2}, {rm}"), + "FCvtLuD"); +} + +TEST_F(AssemblerRISCV64Test, FCvtLuD_Default) { + DriverStr(RepeatrF(&Riscv64Assembler::FCvtLuD, "fcvt.lu.d {reg1}, {reg2}"), "FCvtLuD_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtSW) { + DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtSW, "fcvt.s.w {reg1}, {reg2}, {rm}"), + "FCvtSW"); +} + +TEST_F(AssemblerRISCV64Test, FCvtSW_Default) { + DriverStr(RepeatFr(&Riscv64Assembler::FCvtSW, "fcvt.s.w {reg1}, {reg2}"), "FCvtSW_Default"); +} + +// This conversion is lossless, so the rounding mode is meaningless and the assembler we're +// testing against does not even accept the rounding mode argument, so this test is disabled. +TEST_F(AssemblerRISCV64Test, DISABLED_FCvtDW) { + DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtDW, "fcvt.d.w {reg1}, {reg2}, {rm}"), + "FCvtDW"); +} + +TEST_F(AssemblerRISCV64Test, FCvtDW_Default) { + DriverStr(RepeatFr(&Riscv64Assembler::FCvtDW, "fcvt.d.w {reg1}, {reg2}"), "FCvtDW_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtSWu) { + DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtSWu, "fcvt.s.wu {reg1}, {reg2}, {rm}"), + "FCvtSWu"); +} + +TEST_F(AssemblerRISCV64Test, FCvtSWu_Default) { + DriverStr(RepeatFr(&Riscv64Assembler::FCvtSWu, "fcvt.s.wu {reg1}, {reg2}"), "FCvtSWu_Default"); +} + +// This conversion is lossless, so the rounding mode is meaningless and the assembler we're +// testing against does not even accept the rounding mode argument, so this test is disabled. +TEST_F(AssemblerRISCV64Test, DISABLED_FCvtDWu) { + DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtDWu, "fcvt.d.wu {reg1}, {reg2}, {rm}"), + "FCvtDWu"); +} + +TEST_F(AssemblerRISCV64Test, FCvtDWu_Default) { + DriverStr(RepeatFr(&Riscv64Assembler::FCvtDWu, "fcvt.d.wu {reg1}, {reg2}"), "FCvtDWu_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtSL) { + DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtSL, "fcvt.s.l {reg1}, {reg2}, {rm}"), + "FCvtSL"); +} + +TEST_F(AssemblerRISCV64Test, FCvtSL_Default) { + DriverStr(RepeatFr(&Riscv64Assembler::FCvtSL, "fcvt.s.l {reg1}, {reg2}"), "FCvtSL_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtDL) { + DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtDL, "fcvt.d.l {reg1}, {reg2}, {rm}"), + "FCvtDL"); +} + +TEST_F(AssemblerRISCV64Test, FCvtDL_Default) { + DriverStr(RepeatFr(&Riscv64Assembler::FCvtDL, "fcvt.d.l {reg1}, {reg2}"), "FCvtDL_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtSLu) { + DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtSLu, "fcvt.s.lu {reg1}, {reg2}, {rm}"), + "FCvtSLu"); +} + +TEST_F(AssemblerRISCV64Test, FCvtSLu_Default) { + DriverStr(RepeatFr(&Riscv64Assembler::FCvtSLu, "fcvt.s.lu {reg1}, {reg2}"), "FCvtSLu_Default"); +} + +TEST_F(AssemblerRISCV64Test, FCvtDLu) { + DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtDLu, "fcvt.d.lu {reg1}, {reg2}, {rm}"), + "FCvtDLu"); +} + +TEST_F(AssemblerRISCV64Test, FCvtDLu_Default) { + DriverStr(RepeatFr(&Riscv64Assembler::FCvtDLu, "fcvt.d.lu {reg1}, {reg2}"), "FCvtDLu_Default"); +} + +TEST_F(AssemblerRISCV64Test, FMvXW) { + DriverStr(RepeatRF(&Riscv64Assembler::FMvXW, "fmv.x.w {reg1}, {reg2}"), "FMvXW"); +} + +TEST_F(AssemblerRISCV64Test, FMvXD) { + DriverStr(RepeatRF(&Riscv64Assembler::FMvXD, "fmv.x.d {reg1}, {reg2}"), "FMvXD"); +} + +TEST_F(AssemblerRISCV64Test, FMvWX) { + DriverStr(RepeatFR(&Riscv64Assembler::FMvWX, "fmv.w.x {reg1}, {reg2}"), "FMvWX"); +} + +TEST_F(AssemblerRISCV64Test, FMvDX) { + DriverStr(RepeatFR(&Riscv64Assembler::FMvDX, "fmv.d.x {reg1}, {reg2}"), "FMvDX"); +} + +TEST_F(AssemblerRISCV64Test, FClassS) { + DriverStr(RepeatRF(&Riscv64Assembler::FClassS, "fclass.s {reg1}, {reg2}"), "FClassS"); +} + +TEST_F(AssemblerRISCV64Test, FClassD) { + DriverStr(RepeatrF(&Riscv64Assembler::FClassD, "fclass.d {reg1}, {reg2}"), "FClassD"); +} + +TEST_F(AssemblerRISCV64Test, AddUw) { + DriverStr(RepeatRRR(&Riscv64Assembler::AddUw, "add.uw {reg1}, {reg2}, {reg3}"), "AddUw"); +} + +TEST_F(AssemblerRISCV64Test, Sh1Add) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sh1Add, "sh1add {reg1}, {reg2}, {reg3}"), "Sh1Add"); +} + +TEST_F(AssemblerRISCV64Test, Sh1AddUw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sh1AddUw, "sh1add.uw {reg1}, {reg2}, {reg3}"), "Sh1AddUw"); +} + +TEST_F(AssemblerRISCV64Test, Sh2Add) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sh2Add, "sh2add {reg1}, {reg2}, {reg3}"), "Sh2Add"); +} + +TEST_F(AssemblerRISCV64Test, Sh2AddUw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sh2AddUw, "sh2add.uw {reg1}, {reg2}, {reg3}"), "Sh2AddUw"); +} + +TEST_F(AssemblerRISCV64Test, Sh3Add) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sh3Add, "sh3add {reg1}, {reg2}, {reg3}"), "Sh3Add"); +} + +TEST_F(AssemblerRISCV64Test, Sh3AddUw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Sh3AddUw, "sh3add.uw {reg1}, {reg2}, {reg3}"), "Sh3AddUw"); +} + +TEST_F(AssemblerRISCV64Test, SlliUw) { + DriverStr(RepeatRRIb(&Riscv64Assembler::SlliUw, 6, "slli.uw {reg1}, {reg2}, {imm}"), "SlliUw"); +} + +TEST_F(AssemblerRISCV64Test, Andn) { + DriverStr(RepeatRRR(&Riscv64Assembler::Andn, "andn {reg1}, {reg2}, {reg3}"), "Andn"); +} + +TEST_F(AssemblerRISCV64Test, Orn) { + DriverStr(RepeatRRR(&Riscv64Assembler::Orn, "orn {reg1}, {reg2}, {reg3}"), "Orn"); +} + +TEST_F(AssemblerRISCV64Test, Xnor) { + DriverStr(RepeatRRR(&Riscv64Assembler::Xnor, "xnor {reg1}, {reg2}, {reg3}"), "Xnor"); +} + +TEST_F(AssemblerRISCV64Test, Clz) { + DriverStr(RepeatRR(&Riscv64Assembler::Clz, "clz {reg1}, {reg2}"), "Clz"); +} + +TEST_F(AssemblerRISCV64Test, Clzw) { + DriverStr(RepeatRR(&Riscv64Assembler::Clzw, "clzw {reg1}, {reg2}"), "Clzw"); +} + +TEST_F(AssemblerRISCV64Test, Ctz) { + DriverStr(RepeatRR(&Riscv64Assembler::Ctz, "ctz {reg1}, {reg2}"), "Ctz"); +} + +TEST_F(AssemblerRISCV64Test, Ctzw) { + DriverStr(RepeatRR(&Riscv64Assembler::Ctzw, "ctzw {reg1}, {reg2}"), "Ctzw"); +} + +TEST_F(AssemblerRISCV64Test, Cpop) { + DriverStr(RepeatRR(&Riscv64Assembler::Cpop, "cpop {reg1}, {reg2}"), "Cpop"); +} + +TEST_F(AssemblerRISCV64Test, Cpopw) { + DriverStr(RepeatRR(&Riscv64Assembler::Cpopw, "cpopw {reg1}, {reg2}"), "Cpopw"); +} + +TEST_F(AssemblerRISCV64Test, Min) { + DriverStr(RepeatRRR(&Riscv64Assembler::Min, "min {reg1}, {reg2}, {reg3}"), "Min"); +} + +TEST_F(AssemblerRISCV64Test, Minu) { + DriverStr(RepeatRRR(&Riscv64Assembler::Minu, "minu {reg1}, {reg2}, {reg3}"), "Minu"); +} + +TEST_F(AssemblerRISCV64Test, Max) { + DriverStr(RepeatRRR(&Riscv64Assembler::Max, "max {reg1}, {reg2}, {reg3}"), "Max"); +} + +TEST_F(AssemblerRISCV64Test, Maxu) { + DriverStr(RepeatRRR(&Riscv64Assembler::Maxu, "maxu {reg1}, {reg2}, {reg3}"), "Maxu"); +} + +TEST_F(AssemblerRISCV64Test, Rol) { + DriverStr(RepeatRRR(&Riscv64Assembler::Rol, "rol {reg1}, {reg2}, {reg3}"), "Rol"); +} + +TEST_F(AssemblerRISCV64Test, Rolw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Rolw, "rolw {reg1}, {reg2}, {reg3}"), "Rolw"); +} + +TEST_F(AssemblerRISCV64Test, Ror) { + DriverStr(RepeatRRR(&Riscv64Assembler::Ror, "ror {reg1}, {reg2}, {reg3}"), "Ror"); +} + +TEST_F(AssemblerRISCV64Test, Rorw) { + DriverStr(RepeatRRR(&Riscv64Assembler::Rorw, "rorw {reg1}, {reg2}, {reg3}"), "Rorw"); +} + +TEST_F(AssemblerRISCV64Test, Rori) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Rori, 6, "rori {reg1}, {reg2}, {imm}"), "Rori"); +} + +TEST_F(AssemblerRISCV64Test, Roriw) { + DriverStr(RepeatRRIb(&Riscv64Assembler::Roriw, 5, "roriw {reg1}, {reg2}, {imm}"), "Roriw"); +} + +TEST_F(AssemblerRISCV64Test, OrcB) { + DriverStr(RepeatRR(&Riscv64Assembler::OrcB, "orc.b {reg1}, {reg2}"), "OrcB"); +} + +TEST_F(AssemblerRISCV64Test, Rev8) { + DriverStr(RepeatRR(&Riscv64Assembler::Rev8, "rev8 {reg1}, {reg2}"), "Rev8"); +} + +// Pseudo instructions. +TEST_F(AssemblerRISCV64Test, Nop) { + __ Nop(); + DriverStr("addi zero,zero,0", "Nop"); +} + +TEST_F(AssemblerRISCV64Test, Li) { + SetUseSimpleMarch(true); + TestLoadConst64("Li", + /*can_use_tmp=*/ false, + [&](XRegister rd, int64_t value) { __ Li(rd, value); }); +} + +TEST_F(AssemblerRISCV64Test, Mv) { + DriverStr(RepeatRR(&Riscv64Assembler::Mv, "addi {reg1}, {reg2}, 0"), "Mv"); +} + +TEST_F(AssemblerRISCV64Test, Not) { + DriverStr(RepeatRR(&Riscv64Assembler::Not, "xori {reg1}, {reg2}, -1"), "Not"); +} + +TEST_F(AssemblerRISCV64Test, Neg) { + DriverStr(RepeatRR(&Riscv64Assembler::Neg, "sub {reg1}, x0, {reg2}"), "Neg"); +} + +TEST_F(AssemblerRISCV64Test, NegW) { + DriverStr(RepeatRR(&Riscv64Assembler::NegW, "subw {reg1}, x0, {reg2}"), "Neg"); +} + +TEST_F(AssemblerRISCV64Test, SextB) { + // Note: SEXT.B from the Zbb extension is not supported. + DriverStr(RepeatRR(&Riscv64Assembler::SextB, + "slli {reg1}, {reg2}, 56\n" + "srai {reg1}, {reg1}, 56"), + "SextB"); +} + +TEST_F(AssemblerRISCV64Test, SextH) { + // Note: SEXT.H from the Zbb extension is not supported. + DriverStr(RepeatRR(&Riscv64Assembler::SextH, + "slli {reg1}, {reg2}, 48\n" + "srai {reg1}, {reg1}, 48"), + "SextH"); +} + +TEST_F(AssemblerRISCV64Test, SextW) { + DriverStr(RepeatRR(&Riscv64Assembler::SextW, "addiw {reg1}, {reg2}, 0\n"), "SextW"); +} + +TEST_F(AssemblerRISCV64Test, ZextB) { + DriverStr(RepeatRR(&Riscv64Assembler::ZextB, "andi {reg1}, {reg2}, 255"), "ZextB"); +} + +TEST_F(AssemblerRISCV64Test, ZextH) { + // Note: ZEXT.H from the Zbb extension is not supported. + DriverStr(RepeatRR(&Riscv64Assembler::ZextH, + "slli {reg1}, {reg2}, 48\n" + "srli {reg1}, {reg1}, 48"), + "SextH"); +} + +TEST_F(AssemblerRISCV64Test, ZextW) { + DriverStr(RepeatRR(&Riscv64Assembler::ZextW, + "slli {reg1}, {reg2}, 32\n" + "srli {reg1}, {reg1}, 32"), + "ZextW"); +} + +TEST_F(AssemblerRISCV64Test, Seqz) { + DriverStr(RepeatRR(&Riscv64Assembler::Seqz, "sltiu {reg1}, {reg2}, 1\n"), "Seqz"); +} + +TEST_F(AssemblerRISCV64Test, Snez) { + DriverStr(RepeatRR(&Riscv64Assembler::Snez, "sltu {reg1}, zero, {reg2}\n"), "Snez"); +} + +TEST_F(AssemblerRISCV64Test, Sltz) { + DriverStr(RepeatRR(&Riscv64Assembler::Sltz, "slt {reg1}, {reg2}, zero\n"), "Sltz"); +} + +TEST_F(AssemblerRISCV64Test, Sgtz) { + DriverStr(RepeatRR(&Riscv64Assembler::Sgtz, "slt {reg1}, zero, {reg2}\n"), "Sgtz"); +} + +TEST_F(AssemblerRISCV64Test, FMvS) { + DriverStr(RepeatFF(&Riscv64Assembler::FMvS, "fsgnj.s {reg1}, {reg2}, {reg2}\n"), "FMvS"); +} + +TEST_F(AssemblerRISCV64Test, FAbsS) { + DriverStr(RepeatFF(&Riscv64Assembler::FAbsS, "fsgnjx.s {reg1}, {reg2}, {reg2}\n"), "FAbsS"); +} + +TEST_F(AssemblerRISCV64Test, FNegS) { + DriverStr(RepeatFF(&Riscv64Assembler::FNegS, "fsgnjn.s {reg1}, {reg2}, {reg2}\n"), "FNegS"); +} + +TEST_F(AssemblerRISCV64Test, FMvD) { + DriverStr(RepeatFF(&Riscv64Assembler::FMvD, "fsgnj.d {reg1}, {reg2}, {reg2}\n"), "FMvD"); +} + +TEST_F(AssemblerRISCV64Test, FAbsD) { + DriverStr(RepeatFF(&Riscv64Assembler::FAbsD, "fsgnjx.d {reg1}, {reg2}, {reg2}\n"), "FAbsD"); +} + +TEST_F(AssemblerRISCV64Test, FNegD) { + DriverStr(RepeatFF(&Riscv64Assembler::FNegD, "fsgnjn.d {reg1}, {reg2}, {reg2}\n"), "FNegD"); +} + +TEST_F(AssemblerRISCV64Test, Beqz) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRIbS(&Riscv64Assembler::Beqz, -11, 2, "beq {reg}, zero, {imm}\n"), "Beqz"); +} + +TEST_F(AssemblerRISCV64Test, Bnez) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRIbS(&Riscv64Assembler::Bnez, -11, 2, "bne {reg}, zero, {imm}\n"), "Bnez"); +} + +TEST_F(AssemblerRISCV64Test, Blez) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRIbS(&Riscv64Assembler::Blez, -11, 2, "bge zero, {reg}, {imm}\n"), "Blez"); +} + +TEST_F(AssemblerRISCV64Test, Bgez) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRIbS(&Riscv64Assembler::Bgez, -11, 2, "bge {reg}, zero, {imm}\n"), "Bgez"); +} + +TEST_F(AssemblerRISCV64Test, Bltz) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRIbS(&Riscv64Assembler::Bltz, -11, 2, "blt {reg}, zero, {imm}\n"), "Bltz"); +} + +TEST_F(AssemblerRISCV64Test, Bgtz) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRIbS(&Riscv64Assembler::Bgtz, -11, 2, "blt zero, {reg}, {imm}\n"), "Bgtz"); +} + +TEST_F(AssemblerRISCV64Test, Bgt) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIbS(&Riscv64Assembler::Bgt, -11, 2, "blt {reg2}, {reg1}, {imm}\n"), "Bgt"); +} + +TEST_F(AssemblerRISCV64Test, Ble) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIbS(&Riscv64Assembler::Ble, -11, 2, "bge {reg2}, {reg1}, {imm}\n"), "Bge"); +} + +TEST_F(AssemblerRISCV64Test, Bgtu) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIbS(&Riscv64Assembler::Bgtu, -11, 2, "bltu {reg2}, {reg1}, {imm}\n"), "Bgtu"); +} + +TEST_F(AssemblerRISCV64Test, Bleu) { + // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension. + DriverStr(RepeatRRIbS(&Riscv64Assembler::Bleu, -11, 2, "bgeu {reg2}, {reg1}, {imm}\n"), "Bgeu"); +} + +TEST_F(AssemblerRISCV64Test, J) { + // TODO(riscv64): Change "-19, 2" to "-20, 1" for "C" Standard Extension. + DriverStr(RepeatIbS<int32_t>(&Riscv64Assembler::J, -19, 2, "j {imm}\n"), "J"); +} + +TEST_F(AssemblerRISCV64Test, JalRA) { + // TODO(riscv64): Change "-19, 2" to "-20, 1" for "C" Standard Extension. + DriverStr(RepeatIbS<int32_t>(&Riscv64Assembler::Jal, -19, 2, "jal {imm}\n"), "JalRA"); +} + +TEST_F(AssemblerRISCV64Test, Jr) { + DriverStr(RepeatR(&Riscv64Assembler::Jr, "jr {reg}\n"), "Jr"); +} + +TEST_F(AssemblerRISCV64Test, JalrRA) { + DriverStr(RepeatR(&Riscv64Assembler::Jalr, "jalr {reg}\n"), "JalrRA"); +} + +TEST_F(AssemblerRISCV64Test, Jalr0) { + DriverStr(RepeatRR(&Riscv64Assembler::Jalr, "jalr {reg1}, {reg2}\n"), "Jalr0"); +} + +TEST_F(AssemblerRISCV64Test, Ret) { + __ Ret(); + DriverStr("ret\n", "Ret"); +} + +TEST_F(AssemblerRISCV64Test, RdCycle) { + DriverStr(RepeatR(&Riscv64Assembler::RdCycle, "rdcycle {reg}\n"), "RdCycle"); +} + +TEST_F(AssemblerRISCV64Test, RdTime) { + DriverStr(RepeatR(&Riscv64Assembler::RdTime, "rdtime {reg}\n"), "RdTime"); +} + +TEST_F(AssemblerRISCV64Test, RdInstret) { + DriverStr(RepeatR(&Riscv64Assembler::RdInstret, "rdinstret {reg}\n"), "RdInstret"); +} + +TEST_F(AssemblerRISCV64Test, Csrr) { + TestCsrrXMacro( + "Csrr", "csrr {reg}, {csr}", [&](uint32_t csr, XRegister rd) { __ Csrr(rd, csr); }); +} + +TEST_F(AssemblerRISCV64Test, Csrw) { + TestCsrrXMacro( + "Csrw", "csrw {csr}, {reg}", [&](uint32_t csr, XRegister rs) { __ Csrw(csr, rs); }); +} + +TEST_F(AssemblerRISCV64Test, Csrs) { + TestCsrrXMacro( + "Csrs", "csrs {csr}, {reg}", [&](uint32_t csr, XRegister rs) { __ Csrs(csr, rs); }); +} + +TEST_F(AssemblerRISCV64Test, Csrc) { + TestCsrrXMacro( + "Csrc", "csrc {csr}, {reg}", [&](uint32_t csr, XRegister rs) { __ Csrc(csr, rs); }); +} + +TEST_F(AssemblerRISCV64Test, Csrwi) { + TestCsrrXiMacro( + "Csrwi", "csrwi {csr}, {uimm}", [&](uint32_t csr, uint32_t uimm) { __ Csrwi(csr, uimm); }); +} + +TEST_F(AssemblerRISCV64Test, Csrsi) { + TestCsrrXiMacro( + "Csrsi", "csrsi {csr}, {uimm}", [&](uint32_t csr, uint32_t uimm) { __ Csrsi(csr, uimm); }); +} + +TEST_F(AssemblerRISCV64Test, Csrci) { + TestCsrrXiMacro( + "Csrci", "csrci {csr}, {uimm}", [&](uint32_t csr, uint32_t uimm) { __ Csrci(csr, uimm); }); +} + +TEST_F(AssemblerRISCV64Test, LoadConst32) { + // `LoadConst32()` emits the same code sequences as `Li()` for 32-bit values. + ScratchRegisterScope srs(GetAssembler()); + srs.ExcludeXRegister(TMP); + srs.ExcludeXRegister(TMP2); + DriverStr(RepeatRIb(&Riscv64Assembler::LoadConst32, -32, "li {reg}, {imm}"), "LoadConst32"); +} + +TEST_F(AssemblerRISCV64Test, LoadConst64) { + SetUseSimpleMarch(true); + TestLoadConst64("LoadConst64", + /*can_use_tmp=*/ true, + [&](XRegister rd, int64_t value) { __ LoadConst64(rd, value); }); +} + +TEST_F(AssemblerRISCV64Test, AddConst32) { + auto emit_op = [&](XRegister rd, XRegister rs1, int64_t value) { + __ AddConst32(rd, rs1, dchecked_integral_cast<int32_t>(value)); + }; + TestAddConst("AddConst32", 32, /*suffix=*/ "w", emit_op); +} + +TEST_F(AssemblerRISCV64Test, AddConst64) { + SetUseSimpleMarch(true); + auto emit_op = [&](XRegister rd, XRegister rs1, int64_t value) { + __ AddConst64(rd, rs1, value); + }; + TestAddConst("AddConst64", 64, /*suffix=*/ "", emit_op); +} + +TEST_F(AssemblerRISCV64Test, BcondForward3KiB) { + TestBcondForward("BcondForward3KiB", 3 * KB, "1", GetPrintBcond()); +} + +TEST_F(AssemblerRISCV64Test, BcondBackward3KiB) { + TestBcondBackward("BcondBackward3KiB", 3 * KB, "1", GetPrintBcond()); +} + +TEST_F(AssemblerRISCV64Test, BcondForward5KiB) { + TestBcondForward("BcondForward5KiB", 5 * KB, "1", GetPrintBcondOppositeAndJ("2")); +} + +TEST_F(AssemblerRISCV64Test, BcondBackward5KiB) { + TestBcondBackward("BcondBackward5KiB", 5 * KB, "1", GetPrintBcondOppositeAndJ("2")); +} + +TEST_F(AssemblerRISCV64Test, BcondForward2MiB) { + TestBcondForward("BcondForward2MiB", 2 * MB, "1", GetPrintBcondOppositeAndTail("2", "3")); +} + +TEST_F(AssemblerRISCV64Test, BcondBackward2MiB) { + TestBcondBackward("BcondBackward2MiB", 2 * MB, "1", GetPrintBcondOppositeAndTail("2", "3")); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset13Forward) { + TestBeqA0A1Forward("BeqA0A1MaxOffset13Forward", + MaxOffset13ForwardDistance() - /*BEQ*/ 4u, + "1", + GetPrintBcond()); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset13Backward) { + TestBeqA0A1Backward("BeqA0A1MaxOffset13Forward", + MaxOffset13BackwardDistance(), + "1", + GetPrintBcond()); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1OverMaxOffset13Forward) { + TestBeqA0A1Forward("BeqA0A1OverMaxOffset13Forward", + MaxOffset13ForwardDistance() - /*BEQ*/ 4u + /*Exceed max*/ 4u, + "1", + GetPrintBcondOppositeAndJ("2")); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1OverMaxOffset13Backward) { + TestBeqA0A1Backward("BeqA0A1OverMaxOffset13Forward", + MaxOffset13BackwardDistance() + /*Exceed max*/ 4u, + "1", + GetPrintBcondOppositeAndJ("2")); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset21Forward) { + TestBeqA0A1Forward("BeqA0A1MaxOffset21Forward", + MaxOffset21ForwardDistance() - /*J*/ 4u, + "1", + GetPrintBcondOppositeAndJ("2")); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset21Backward) { + TestBeqA0A1Backward("BeqA0A1MaxOffset21Backward", + MaxOffset21BackwardDistance() - /*BNE*/ 4u, + "1", + GetPrintBcondOppositeAndJ("2")); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1OverMaxOffset21Forward) { + TestBeqA0A1Forward("BeqA0A1OverMaxOffset21Forward", + MaxOffset21ForwardDistance() - /*J*/ 4u + /*Exceed max*/ 4u, + "1", + GetPrintBcondOppositeAndTail("2", "3")); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1OverMaxOffset21Backward) { + TestBeqA0A1Backward("BeqA0A1OverMaxOffset21Backward", + MaxOffset21BackwardDistance() - /*BNE*/ 4u + /*Exceed max*/ 4u, + "1", + GetPrintBcondOppositeAndTail("2", "3")); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1AlmostCascade) { + TestBeqA0A1MaybeCascade("BeqA0A1AlmostCascade", /*cascade=*/ false, GetPrintBcond()); +} + +TEST_F(AssemblerRISCV64Test, BeqA0A1Cascade) { + TestBeqA0A1MaybeCascade( + "BeqA0A1AlmostCascade", /*cascade=*/ true, GetPrintBcondOppositeAndJ("1")); +} + +TEST_F(AssemblerRISCV64Test, BcondElimination) { + Riscv64Label label; + __ Bind(&label); + __ Nop(); + for (XRegister reg : GetRegisters()) { + __ Bne(reg, reg, &label); + __ Blt(reg, reg, &label); + __ Bgt(reg, reg, &label); + __ Bltu(reg, reg, &label); + __ Bgtu(reg, reg, &label); + } + DriverStr("nop\n", "BcondElimination"); +} + +TEST_F(AssemblerRISCV64Test, BcondUnconditional) { + Riscv64Label label; + __ Bind(&label); + __ Nop(); + for (XRegister reg : GetRegisters()) { + __ Beq(reg, reg, &label); + __ Bge(reg, reg, &label); + __ Ble(reg, reg, &label); + __ Bleu(reg, reg, &label); + __ Bgeu(reg, reg, &label); + } + std::string expected = + "1:\n" + "nop\n" + + RepeatInsn(5u * GetRegisters().size(), "j 1b\n", []() {}); + DriverStr(expected, "BcondUnconditional"); +} + +TEST_F(AssemblerRISCV64Test, JalRdForward3KiB) { + TestJalRdForward("JalRdForward3KiB", 3 * KB, "1", GetPrintJalRd()); +} + +TEST_F(AssemblerRISCV64Test, JalRdBackward3KiB) { + TestJalRdBackward("JalRdBackward3KiB", 3 * KB, "1", GetPrintJalRd()); +} + +TEST_F(AssemblerRISCV64Test, JalRdForward2MiB) { + TestJalRdForward("JalRdForward2MiB", 2 * MB, "1", GetPrintCallRd("2")); +} + +TEST_F(AssemblerRISCV64Test, JalRdBackward2MiB) { + TestJalRdBackward("JalRdBackward2MiB", 2 * MB, "1", GetPrintCallRd("2")); +} + +TEST_F(AssemblerRISCV64Test, JForward3KiB) { + TestBuncondForward("JForward3KiB", 3 * KB, "1", GetEmitJ(), GetPrintJ()); +} + +TEST_F(AssemblerRISCV64Test, JBackward3KiB) { + TestBuncondBackward("JBackward3KiB", 3 * KB, "1", GetEmitJ(), GetPrintJ()); +} + +TEST_F(AssemblerRISCV64Test, JForward2MiB) { + TestBuncondForward("JForward2MiB", 2 * MB, "1", GetEmitJ(), GetPrintTail("2")); +} + +TEST_F(AssemblerRISCV64Test, JBackward2MiB) { + TestBuncondBackward("JBackward2MiB", 2 * MB, "1", GetEmitJ(), GetPrintTail("2")); +} + +TEST_F(AssemblerRISCV64Test, JMaxOffset21Forward) { + TestBuncondForward("JMaxOffset21Forward", + MaxOffset21ForwardDistance() - /*J*/ 4u, + "1", + GetEmitJ(), + GetPrintJ()); +} + +TEST_F(AssemblerRISCV64Test, JMaxOffset21Backward) { + TestBuncondBackward("JMaxOffset21Backward", + MaxOffset21BackwardDistance(), + "1", + GetEmitJ(), + GetPrintJ()); +} + +TEST_F(AssemblerRISCV64Test, JOverMaxOffset21Forward) { + TestBuncondForward("JOverMaxOffset21Forward", + MaxOffset21ForwardDistance() - /*J*/ 4u + /*Exceed max*/ 4u, + "1", + GetEmitJ(), + GetPrintTail("2")); +} + +TEST_F(AssemblerRISCV64Test, JOverMaxOffset21Backward) { + TestBuncondBackward("JMaxOffset21Backward", + MaxOffset21BackwardDistance() + /*Exceed max*/ 4u, + "1", + GetEmitJ(), + GetPrintTail("2")); +} + +TEST_F(AssemblerRISCV64Test, CallForward3KiB) { + TestBuncondForward("CallForward3KiB", 3 * KB, "1", GetEmitJal(), GetPrintJal()); +} + +TEST_F(AssemblerRISCV64Test, CallBackward3KiB) { + TestBuncondBackward("CallBackward3KiB", 3 * KB, "1", GetEmitJal(), GetPrintJal()); +} + +TEST_F(AssemblerRISCV64Test, CallForward2MiB) { + TestBuncondForward("CallForward2MiB", 2 * MB, "1", GetEmitJal(), GetPrintCall("2")); +} + +TEST_F(AssemblerRISCV64Test, CallBackward2MiB) { + TestBuncondBackward("CallBackward2MiB", 2 * MB, "1", GetEmitJal(), GetPrintCall("2")); +} + +TEST_F(AssemblerRISCV64Test, CallMaxOffset21Forward) { + TestBuncondForward("CallMaxOffset21Forward", + MaxOffset21ForwardDistance() - /*J*/ 4u, + "1", + GetEmitJal(), + GetPrintJal()); +} + +TEST_F(AssemblerRISCV64Test, CallMaxOffset21Backward) { + TestBuncondBackward("CallMaxOffset21Backward", + MaxOffset21BackwardDistance(), + "1", + GetEmitJal(), + GetPrintJal()); +} + +TEST_F(AssemblerRISCV64Test, CallOverMaxOffset21Forward) { + TestBuncondForward("CallOverMaxOffset21Forward", + MaxOffset21ForwardDistance() - /*J*/ 4u + /*Exceed max*/ 4u, + "1", + GetEmitJal(), + GetPrintCall("2")); +} + +TEST_F(AssemblerRISCV64Test, CallOverMaxOffset21Backward) { + TestBuncondBackward("CallMaxOffset21Backward", + MaxOffset21BackwardDistance() + /*Exceed max*/ 4u, + "1", + GetEmitJal(), + GetPrintCall("2")); +} + +TEST_F(AssemblerRISCV64Test, Loadb) { + TestLoadStoreArbitraryOffset("Loadb", "lb", &Riscv64Assembler::Loadb, /*is_store=*/ false); +} + +TEST_F(AssemblerRISCV64Test, Loadh) { + TestLoadStoreArbitraryOffset("Loadh", "lh", &Riscv64Assembler::Loadh, /*is_store=*/ false); +} + +TEST_F(AssemblerRISCV64Test, Loadw) { + TestLoadStoreArbitraryOffset("Loadw", "lw", &Riscv64Assembler::Loadw, /*is_store=*/ false); +} + +TEST_F(AssemblerRISCV64Test, Loadd) { + TestLoadStoreArbitraryOffset("Loadd", "ld", &Riscv64Assembler::Loadd, /*is_store=*/ false); +} + +TEST_F(AssemblerRISCV64Test, Loadbu) { + TestLoadStoreArbitraryOffset("Loadbu", "lbu", &Riscv64Assembler::Loadbu, /*is_store=*/ false); +} + +TEST_F(AssemblerRISCV64Test, Loadhu) { + TestLoadStoreArbitraryOffset("Loadhu", "lhu", &Riscv64Assembler::Loadhu, /*is_store=*/ false); +} + +TEST_F(AssemblerRISCV64Test, Loadwu) { + TestLoadStoreArbitraryOffset("Loadwu", "lwu", &Riscv64Assembler::Loadwu, /*is_store=*/ false); +} + +TEST_F(AssemblerRISCV64Test, Storeb) { + TestLoadStoreArbitraryOffset("Storeb", "sb", &Riscv64Assembler::Storeb, /*is_store=*/ true); +} + +TEST_F(AssemblerRISCV64Test, Storeh) { + TestLoadStoreArbitraryOffset("Storeh", "sh", &Riscv64Assembler::Storeh, /*is_store=*/ true); +} + +TEST_F(AssemblerRISCV64Test, Storew) { + TestLoadStoreArbitraryOffset("Storew", "sw", &Riscv64Assembler::Storew, /*is_store=*/ true); +} + +TEST_F(AssemblerRISCV64Test, Stored) { + TestLoadStoreArbitraryOffset("Stored", "sd", &Riscv64Assembler::Stored, /*is_store=*/ true); +} + +TEST_F(AssemblerRISCV64Test, FLoadw) { + TestFPLoadStoreArbitraryOffset("FLoadw", "flw", &Riscv64Assembler::FLoadw); +} + +TEST_F(AssemblerRISCV64Test, FLoadd) { + TestFPLoadStoreArbitraryOffset("FLoadd", "fld", &Riscv64Assembler::FLoadd); +} + +TEST_F(AssemblerRISCV64Test, FStorew) { + TestFPLoadStoreArbitraryOffset("FStorew", "fsw", &Riscv64Assembler::FStorew); +} + +TEST_F(AssemblerRISCV64Test, FStored) { + TestFPLoadStoreArbitraryOffset("FStored", "fsd", &Riscv64Assembler::FStored); +} + +TEST_F(AssemblerRISCV64Test, Unimp) { + __ Unimp(); + DriverStr("unimp\n", "Unimp"); +} + +TEST_F(AssemblerRISCV64Test, LoadLabelAddress) { + std::string expected; + constexpr size_t kNumLoadsForward = 4 * KB; + constexpr size_t kNumLoadsBackward = 4 * KB; + Riscv64Label label; + auto emit_batch = [&](size_t num_loads, const std::string& target_label) { + for (size_t i = 0; i != num_loads; ++i) { + // Cycle through non-Zero registers. + XRegister rd = enum_cast<XRegister>((i % (kNumberOfXRegisters - 1)) + 1); + DCHECK_NE(rd, Zero); + std::string rd_name = GetRegisterName(rd); + __ LoadLabelAddress(rd, &label); + expected += "1:\n"; + expected += ART_FORMAT("auipc {}, %pcrel_hi({})\n", rd_name, target_label); + expected += ART_FORMAT("addi {}, {}, %pcrel_lo(1b)\n", rd_name, rd_name); + } + }; + emit_batch(kNumLoadsForward, "2f"); + __ Bind(&label); + expected += "2:\n"; + emit_batch(kNumLoadsBackward, "2b"); + DriverStr(expected, "LoadLabelAddress"); +} + +TEST_F(AssemblerRISCV64Test, LoadLiteralWithPaddingForLong) { + TestLoadLiteral("LoadLiteralWithPaddingForLong", /*with_padding_for_long=*/ true); +} + +TEST_F(AssemblerRISCV64Test, LoadLiteralWithoutPaddingForLong) { + TestLoadLiteral("LoadLiteralWithoutPaddingForLong", /*with_padding_for_long=*/ false); +} + +TEST_F(AssemblerRISCV64Test, JumpTable) { + std::string expected; + expected += EmitNops(sizeof(uint32_t)); + Riscv64Label targets[4]; + uint32_t target_locations[4]; + JumpTable* jump_table = __ CreateJumpTable(ArenaVector<Riscv64Label*>( + {&targets[0], &targets[1], &targets[2], &targets[3]}, __ GetAllocator()->Adapter())); + for (size_t i : {0, 1, 2, 3}) { + target_locations[i] = __ CodeSize(); + __ Bind(&targets[i]); + expected += std::to_string(i) + ":\n"; + expected += EmitNops(sizeof(uint32_t)); + } + __ LoadLabelAddress(A0, jump_table->GetLabel()); + expected += "4:\n" + "auipc a0, %pcrel_hi(5f)\n" + "addi a0, a0, %pcrel_lo(4b)\n"; + expected += EmitNops(sizeof(uint32_t)); + uint32_t label5_location = __ CodeSize(); + auto target_offset = [&](size_t i) { + // Even with `-mno-relax`, clang assembler does not fully resolve `.4byte 0b - 5b` + // and emits a relocation, so we need to calculate target offsets ourselves. + return std::to_string(static_cast<int64_t>(target_locations[i] - label5_location)); + }; + expected += "5:\n" + ".4byte " + target_offset(0) + "\n" + ".4byte " + target_offset(1) + "\n" + ".4byte " + target_offset(2) + "\n" + ".4byte " + target_offset(3) + "\n"; + DriverStr(expected, "JumpTable"); +} + +TEST_F(AssemblerRISCV64Test, ScratchRegisters) { + ScratchRegisterScope srs(GetAssembler()); + ASSERT_EQ(2u, srs.AvailableXRegisters()); // Default: TMP(T6) and TMP2(T5). + ASSERT_EQ(1u, srs.AvailableFRegisters()); // Default: FTMP(FT11). + + XRegister tmp = srs.AllocateXRegister(); + EXPECT_EQ(TMP, tmp); + XRegister tmp2 = srs.AllocateXRegister(); + EXPECT_EQ(TMP2, tmp2); + ASSERT_EQ(0u, srs.AvailableXRegisters()); + + FRegister ftmp = srs.AllocateFRegister(); + EXPECT_EQ(FTMP, ftmp); + ASSERT_EQ(0u, srs.AvailableFRegisters()); + + // Test nesting. + srs.FreeXRegister(A0); + srs.FreeXRegister(A1); + srs.FreeFRegister(FA0); + srs.FreeFRegister(FA1); + ASSERT_EQ(2u, srs.AvailableXRegisters()); + ASSERT_EQ(2u, srs.AvailableFRegisters()); + { + ScratchRegisterScope srs2(GetAssembler()); + ASSERT_EQ(2u, srs2.AvailableXRegisters()); + ASSERT_EQ(2u, srs2.AvailableFRegisters()); + XRegister a1 = srs2.AllocateXRegister(); + EXPECT_EQ(A1, a1); + XRegister a0 = srs2.AllocateXRegister(); + EXPECT_EQ(A0, a0); + ASSERT_EQ(0u, srs2.AvailableXRegisters()); + FRegister fa1 = srs2.AllocateFRegister(); + EXPECT_EQ(FA1, fa1); + FRegister fa0 = srs2.AllocateFRegister(); + EXPECT_EQ(FA0, fa0); + ASSERT_EQ(0u, srs2.AvailableFRegisters()); + } + ASSERT_EQ(2u, srs.AvailableXRegisters()); + ASSERT_EQ(2u, srs.AvailableFRegisters()); + + srs.IncludeXRegister(A0); // No-op as the register was already available. + ASSERT_EQ(2u, srs.AvailableXRegisters()); + srs.IncludeFRegister(FA0); // No-op as the register was already available. + ASSERT_EQ(2u, srs.AvailableFRegisters()); + srs.IncludeXRegister(S0); + ASSERT_EQ(3u, srs.AvailableXRegisters()); + srs.IncludeFRegister(FS0); + ASSERT_EQ(3u, srs.AvailableFRegisters()); + + srs.ExcludeXRegister(S1); // No-op as the register was not available. + ASSERT_EQ(3u, srs.AvailableXRegisters()); + srs.ExcludeFRegister(FS1); // No-op as the register was not available. + ASSERT_EQ(3u, srs.AvailableFRegisters()); + srs.ExcludeXRegister(A0); + ASSERT_EQ(2u, srs.AvailableXRegisters()); + srs.ExcludeFRegister(FA0); + ASSERT_EQ(2u, srs.AvailableFRegisters()); +} + +#undef __ + +} // namespace riscv64 +} // namespace art diff --git a/compiler/utils/riscv64/jni_macro_assembler_riscv64.cc b/compiler/utils/riscv64/jni_macro_assembler_riscv64.cc new file mode 100644 index 0000000000..3aeee8a154 --- /dev/null +++ b/compiler/utils/riscv64/jni_macro_assembler_riscv64.cc @@ -0,0 +1,633 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "jni_macro_assembler_riscv64.h" + +#include "base/bit_utils_iterator.h" +#include "dwarf/register.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "indirect_reference_table.h" +#include "lock_word.h" +#include "managed_register_riscv64.h" +#include "offsets.h" +#include "thread.h" + +namespace art HIDDEN { +namespace riscv64 { + +static constexpr size_t kSpillSize = 8; // Both GPRs and FPRs + +static std::pair<uint32_t, uint32_t> GetCoreAndFpSpillMasks( + ArrayRef<const ManagedRegister> callee_save_regs) { + uint32_t core_spill_mask = 0u; + uint32_t fp_spill_mask = 0u; + for (ManagedRegister r : callee_save_regs) { + Riscv64ManagedRegister reg = r.AsRiscv64(); + if (reg.IsXRegister()) { + core_spill_mask |= 1u << reg.AsXRegister(); + } else { + DCHECK(reg.IsFRegister()); + fp_spill_mask |= 1u << reg.AsFRegister(); + } + } + DCHECK_EQ(callee_save_regs.size(), + dchecked_integral_cast<size_t>(POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask))); + return {core_spill_mask, fp_spill_mask}; +} + +#define __ asm_. + +Riscv64JNIMacroAssembler::~Riscv64JNIMacroAssembler() { +} + +void Riscv64JNIMacroAssembler::FinalizeCode() { + __ FinalizeCode(); +} + +void Riscv64JNIMacroAssembler::BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> callee_save_regs) { + // Increase frame to required size. + DCHECK_ALIGNED(frame_size, kStackAlignment); + // Must at least have space for Method* if we're going to spill it. + DCHECK_GE(frame_size, + (callee_save_regs.size() + (method_reg.IsRegister() ? 1u : 0u)) * kSpillSize); + IncreaseFrameSize(frame_size); + + // Save callee-saves. + auto [core_spill_mask, fp_spill_mask] = GetCoreAndFpSpillMasks(callee_save_regs); + size_t offset = frame_size; + if ((core_spill_mask & (1u << RA)) != 0u) { + offset -= kSpillSize; + __ Stored(RA, SP, offset); + __ cfi().RelOffset(dwarf::Reg::Riscv64Core(RA), offset); + } + for (uint32_t reg : HighToLowBits(core_spill_mask & ~(1u << RA))) { + offset -= kSpillSize; + __ Stored(enum_cast<XRegister>(reg), SP, offset); + __ cfi().RelOffset(dwarf::Reg::Riscv64Core(enum_cast<XRegister>(reg)), offset); + } + for (uint32_t reg : HighToLowBits(fp_spill_mask)) { + offset -= kSpillSize; + __ FStored(enum_cast<FRegister>(reg), SP, offset); + __ cfi().RelOffset(dwarf::Reg::Riscv64Fp(enum_cast<FRegister>(reg)), offset); + } + + if (method_reg.IsRegister()) { + // Write ArtMethod*. + DCHECK_EQ(A0, method_reg.AsRiscv64().AsXRegister()); + __ Stored(A0, SP, 0); + } +} + +void Riscv64JNIMacroAssembler::RemoveFrame(size_t frame_size, + ArrayRef<const ManagedRegister> callee_save_regs, + [[maybe_unused]] bool may_suspend) { + cfi().RememberState(); + + // Restore callee-saves. + auto [core_spill_mask, fp_spill_mask] = GetCoreAndFpSpillMasks(callee_save_regs); + size_t offset = frame_size - callee_save_regs.size() * kSpillSize; + for (uint32_t reg : LowToHighBits(fp_spill_mask)) { + __ FLoadd(enum_cast<FRegister>(reg), SP, offset); + __ cfi().Restore(dwarf::Reg::Riscv64Fp(enum_cast<FRegister>(reg))); + offset += kSpillSize; + } + for (uint32_t reg : LowToHighBits(core_spill_mask & ~(1u << RA))) { + __ Loadd(enum_cast<XRegister>(reg), SP, offset); + __ cfi().Restore(dwarf::Reg::Riscv64Core(enum_cast<XRegister>(reg))); + offset += kSpillSize; + } + if ((core_spill_mask & (1u << RA)) != 0u) { + __ Loadd(RA, SP, offset); + __ cfi().Restore(dwarf::Reg::Riscv64Core(RA)); + offset += kSpillSize; + } + DCHECK_EQ(offset, frame_size); + + // Decrease the frame size. + DecreaseFrameSize(frame_size); + + // Return to RA. + __ Ret(); + + // The CFI should be restored for any code that follows the exit block. + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(frame_size); +} + +void Riscv64JNIMacroAssembler::IncreaseFrameSize(size_t adjust) { + if (adjust != 0u) { + CHECK_ALIGNED(adjust, kStackAlignment); + int64_t adjustment = dchecked_integral_cast<int64_t>(adjust); + __ AddConst64(SP, SP, -adjustment); + __ cfi().AdjustCFAOffset(adjustment); + } +} + +void Riscv64JNIMacroAssembler::DecreaseFrameSize(size_t adjust) { + if (adjust != 0u) { + CHECK_ALIGNED(adjust, kStackAlignment); + int64_t adjustment = dchecked_integral_cast<int64_t>(adjust); + __ AddConst64(SP, SP, adjustment); + __ cfi().AdjustCFAOffset(-adjustment); + } +} + +ManagedRegister Riscv64JNIMacroAssembler::CoreRegisterWithSize(ManagedRegister src, size_t size) { + DCHECK(src.AsRiscv64().IsXRegister()); + DCHECK(size == 4u || size == 8u) << size; + return src; +} + +void Riscv64JNIMacroAssembler::Store(FrameOffset offs, ManagedRegister m_src, size_t size) { + Store(Riscv64ManagedRegister::FromXRegister(SP), MemberOffset(offs.Int32Value()), m_src, size); +} + +void Riscv64JNIMacroAssembler::Store(ManagedRegister m_base, + MemberOffset offs, + ManagedRegister m_src, + size_t size) { + Riscv64ManagedRegister base = m_base.AsRiscv64(); + Riscv64ManagedRegister src = m_src.AsRiscv64(); + if (src.IsXRegister()) { + if (size == 4u) { + __ Storew(src.AsXRegister(), base.AsXRegister(), offs.Int32Value()); + } else { + CHECK_EQ(8u, size); + __ Stored(src.AsXRegister(), base.AsXRegister(), offs.Int32Value()); + } + } else { + CHECK(src.IsFRegister()) << src; + if (size == 4u) { + __ FStorew(src.AsFRegister(), base.AsXRegister(), offs.Int32Value()); + } else { + CHECK_EQ(8u, size); + __ FStored(src.AsFRegister(), base.AsXRegister(), offs.Int32Value()); + } + } +} + +void Riscv64JNIMacroAssembler::StoreRawPtr(FrameOffset offs, ManagedRegister m_src) { + Riscv64ManagedRegister sp = Riscv64ManagedRegister::FromXRegister(SP); + Store(sp, MemberOffset(offs.Int32Value()), m_src, static_cast<size_t>(kRiscv64PointerSize)); +} + +void Riscv64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 offs, bool tag_sp) { + XRegister src = SP; + ScratchRegisterScope srs(&asm_); + if (tag_sp) { + XRegister tmp = srs.AllocateXRegister(); + __ Ori(tmp, SP, 0x2); + src = tmp; + } + __ Stored(src, TR, offs.Int32Value()); +} + +void Riscv64JNIMacroAssembler::Load(ManagedRegister m_dest, FrameOffset offs, size_t size) { + Riscv64ManagedRegister sp = Riscv64ManagedRegister::FromXRegister(SP); + Load(m_dest, sp, MemberOffset(offs.Int32Value()), size); +} + +void Riscv64JNIMacroAssembler::Load(ManagedRegister m_dest, + ManagedRegister m_base, + MemberOffset offs, + size_t size) { + Riscv64ManagedRegister base = m_base.AsRiscv64(); + Riscv64ManagedRegister dest = m_dest.AsRiscv64(); + if (dest.IsXRegister()) { + if (size == 4u) { + // The riscv64 native calling convention specifies that integers narrower than XLEN (64) + // bits are "widened according to the sign of their type up to 32 bits, then sign-extended + // to XLEN bits." The managed ABI already passes integral values this way in registers + // and correctly widened to 32 bits on the stack. The `Load()` must sign-extend narrower + // types here to pass integral values correctly to the native call. + // For `float` args, the upper 32 bits are undefined, so this is fine for them as well. + __ Loadw(dest.AsXRegister(), base.AsXRegister(), offs.Int32Value()); + } else { + CHECK_EQ(8u, size); + __ Loadd(dest.AsXRegister(), base.AsXRegister(), offs.Int32Value()); + } + } else { + CHECK(dest.IsFRegister()) << dest; + if (size == 4u) { + __ FLoadw(dest.AsFRegister(), base.AsXRegister(), offs.Int32Value()); + } else { + CHECK_EQ(8u, size); + __ FLoadd(dest.AsFRegister(), base.AsXRegister(), offs.Int32Value()); + } + } +} + +void Riscv64JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister m_dest, ThreadOffset64 offs) { + Riscv64ManagedRegister tr = Riscv64ManagedRegister::FromXRegister(TR); + Load(m_dest, tr, MemberOffset(offs.Int32Value()), static_cast<size_t>(kRiscv64PointerSize)); +} + +void Riscv64JNIMacroAssembler::LoadGcRootWithoutReadBarrier(ManagedRegister m_dest, + ManagedRegister m_base, + MemberOffset offs) { + Riscv64ManagedRegister base = m_base.AsRiscv64(); + Riscv64ManagedRegister dest = m_dest.AsRiscv64(); + __ Loadwu(dest.AsXRegister(), base.AsXRegister(), offs.Int32Value()); +} + +void Riscv64JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests, + ArrayRef<ArgumentLocation> srcs, + ArrayRef<FrameOffset> refs) { + size_t arg_count = dests.size(); + DCHECK_EQ(arg_count, srcs.size()); + DCHECK_EQ(arg_count, refs.size()); + + auto get_mask = [](ManagedRegister reg) -> uint64_t { + Riscv64ManagedRegister riscv64_reg = reg.AsRiscv64(); + if (riscv64_reg.IsXRegister()) { + size_t core_reg_number = static_cast<size_t>(riscv64_reg.AsXRegister()); + DCHECK_LT(core_reg_number, 32u); + return UINT64_C(1) << core_reg_number; + } else { + DCHECK(riscv64_reg.IsFRegister()); + size_t fp_reg_number = static_cast<size_t>(riscv64_reg.AsFRegister()); + DCHECK_LT(fp_reg_number, 32u); + return (UINT64_C(1) << 32u) << fp_reg_number; + } + }; + + // Collect registers to move while storing/copying args to stack slots. + // Convert processed references to `jobject`. + uint64_t src_regs = 0u; + uint64_t dest_regs = 0u; + for (size_t i = 0; i != arg_count; ++i) { + const ArgumentLocation& src = srcs[i]; + const ArgumentLocation& dest = dests[i]; + const FrameOffset ref = refs[i]; + if (ref != kInvalidReferenceOffset) { + DCHECK_EQ(src.GetSize(), kObjectReferenceSize); + DCHECK_EQ(dest.GetSize(), static_cast<size_t>(kRiscv64PointerSize)); + } else { + DCHECK(src.GetSize() == 4u || src.GetSize() == 8u) << src.GetSize(); + DCHECK(dest.GetSize() == 4u || dest.GetSize() == 8u) << dest.GetSize(); + DCHECK_LE(src.GetSize(), dest.GetSize()); + } + if (dest.IsRegister()) { + if (src.IsRegister() && src.GetRegister().Equals(dest.GetRegister())) { + // No move is necessary but we may need to convert a reference to a `jobject`. + if (ref != kInvalidReferenceOffset) { + CreateJObject(dest.GetRegister(), ref, src.GetRegister(), /*null_allowed=*/ i != 0u); + } + } else { + if (src.IsRegister()) { + src_regs |= get_mask(src.GetRegister()); + } + dest_regs |= get_mask(dest.GetRegister()); + } + } else { + ScratchRegisterScope srs(&asm_); + Riscv64ManagedRegister reg = src.IsRegister() + ? src.GetRegister().AsRiscv64() + : Riscv64ManagedRegister::FromXRegister(srs.AllocateXRegister()); + if (!src.IsRegister()) { + if (ref != kInvalidReferenceOffset) { + // We're loading the reference only for comparison with null, so it does not matter + // if we sign- or zero-extend but let's correctly zero-extend the reference anyway. + __ Loadwu(reg.AsRiscv64().AsXRegister(), SP, src.GetFrameOffset().SizeValue()); + } else { + Load(reg, src.GetFrameOffset(), src.GetSize()); + } + } + if (ref != kInvalidReferenceOffset) { + DCHECK_NE(i, 0u); + CreateJObject(reg, ref, reg, /*null_allowed=*/ true); + } + Store(dest.GetFrameOffset(), reg, dest.GetSize()); + } + } + + // Fill destination registers. + // There should be no cycles, so this simple algorithm should make progress. + while (dest_regs != 0u) { + uint64_t old_dest_regs = dest_regs; + for (size_t i = 0; i != arg_count; ++i) { + const ArgumentLocation& src = srcs[i]; + const ArgumentLocation& dest = dests[i]; + const FrameOffset ref = refs[i]; + if (!dest.IsRegister()) { + continue; // Stored in first loop above. + } + uint64_t dest_reg_mask = get_mask(dest.GetRegister()); + if ((dest_reg_mask & dest_regs) == 0u) { + continue; // Equals source, or already filled in one of previous iterations. + } + if ((dest_reg_mask & src_regs) != 0u) { + continue; // Cannot clobber this register yet. + } + if (src.IsRegister()) { + if (ref != kInvalidReferenceOffset) { + DCHECK_NE(i, 0u); // The `this` arg remains in the same register (handled above). + CreateJObject(dest.GetRegister(), ref, src.GetRegister(), /*null_allowed=*/ true); + } else { + Move(dest.GetRegister(), src.GetRegister(), dest.GetSize()); + } + src_regs &= ~get_mask(src.GetRegister()); // Allow clobbering source register. + } else { + Load(dest.GetRegister(), src.GetFrameOffset(), src.GetSize()); + // No `jobject` conversion needed. There are enough arg registers in managed ABI + // to hold all references that yield a register arg `jobject` in native ABI. + DCHECK_EQ(ref, kInvalidReferenceOffset); + } + dest_regs &= ~get_mask(dest.GetRegister()); // Destination register was filled. + } + CHECK_NE(old_dest_regs, dest_regs); + DCHECK_EQ(0u, dest_regs & ~old_dest_regs); + } +} + +void Riscv64JNIMacroAssembler::Move(ManagedRegister m_dest, ManagedRegister m_src, size_t size) { + // Note: This function is used only for moving between GPRs. + // FP argument registers hold the same arguments in managed and native ABIs. + DCHECK(size == 4u || size == 8u) << size; + Riscv64ManagedRegister dest = m_dest.AsRiscv64(); + Riscv64ManagedRegister src = m_src.AsRiscv64(); + DCHECK(dest.IsXRegister()); + DCHECK(src.IsXRegister()); + if (!dest.Equals(src)) { + __ Mv(dest.AsXRegister(), src.AsXRegister()); + } +} + +void Riscv64JNIMacroAssembler::Move(ManagedRegister m_dest, size_t value) { + DCHECK(m_dest.AsRiscv64().IsXRegister()); + __ LoadConst64(m_dest.AsRiscv64().AsXRegister(), dchecked_integral_cast<int64_t>(value)); +} + +void Riscv64JNIMacroAssembler::SignExtend([[maybe_unused]] ManagedRegister mreg, + [[maybe_unused]] size_t size) { + LOG(FATAL) << "The result is already sign-extended in the native ABI."; + UNREACHABLE(); +} + +void Riscv64JNIMacroAssembler::ZeroExtend([[maybe_unused]] ManagedRegister mreg, + [[maybe_unused]] size_t size) { + LOG(FATAL) << "The result is already zero-extended in the native ABI."; + UNREACHABLE(); +} + +void Riscv64JNIMacroAssembler::GetCurrentThread(ManagedRegister dest) { + DCHECK(dest.AsRiscv64().IsXRegister()); + __ Mv(dest.AsRiscv64().AsXRegister(), TR); +} + +void Riscv64JNIMacroAssembler::GetCurrentThread(FrameOffset offset) { + __ Stored(TR, SP, offset.Int32Value()); +} + +void Riscv64JNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister m_reg, + JNIMacroLabel* slow_path, + JNIMacroLabel* resume) { + // This implements the fast-path of `Thread::DecodeJObject()`. + constexpr int64_t kGlobalOrWeakGlobalMask = IndirectReferenceTable::GetGlobalOrWeakGlobalMask(); + DCHECK(IsInt<12>(kGlobalOrWeakGlobalMask)); + constexpr int64_t kIndirectRefKindMask = IndirectReferenceTable::GetIndirectRefKindMask(); + DCHECK(IsInt<12>(kIndirectRefKindMask)); + XRegister reg = m_reg.AsRiscv64().AsXRegister(); + __ Beqz(reg, Riscv64JNIMacroLabel::Cast(resume)->AsRiscv64()); // Skip test and load for null. + __ Andi(TMP, reg, kGlobalOrWeakGlobalMask); + __ Bnez(TMP, Riscv64JNIMacroLabel::Cast(slow_path)->AsRiscv64()); + __ Andi(reg, reg, ~kIndirectRefKindMask); + __ Loadw(reg, reg, 0); +} + +void Riscv64JNIMacroAssembler::VerifyObject([[maybe_unused]] ManagedRegister m_src, + [[maybe_unused]] bool could_be_null) { + // TODO: not validating references. +} + +void Riscv64JNIMacroAssembler::VerifyObject([[maybe_unused]] FrameOffset src, + [[maybe_unused]] bool could_be_null) { + // TODO: not validating references. +} + +void Riscv64JNIMacroAssembler::Jump(ManagedRegister m_base, Offset offs) { + Riscv64ManagedRegister base = m_base.AsRiscv64(); + CHECK(base.IsXRegister()) << base; + ScratchRegisterScope srs(&asm_); + XRegister tmp = srs.AllocateXRegister(); + __ Loadd(tmp, base.AsXRegister(), offs.Int32Value()); + __ Jr(tmp); +} + +void Riscv64JNIMacroAssembler::Call(ManagedRegister m_base, Offset offs) { + Riscv64ManagedRegister base = m_base.AsRiscv64(); + CHECK(base.IsXRegister()) << base; + __ Loadd(RA, base.AsXRegister(), offs.Int32Value()); + __ Jalr(RA); +} + + +void Riscv64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset) { + Call(Riscv64ManagedRegister::FromXRegister(TR), offset); +} + +void Riscv64JNIMacroAssembler::TryToTransitionFromRunnableToNative( + JNIMacroLabel* label, + ArrayRef<const ManagedRegister> scratch_regs) { + constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative); + constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable); + constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kRiscv64PointerSize>(); + constexpr ThreadOffset64 thread_held_mutex_mutator_lock_offset = + Thread::HeldMutexOffset<kRiscv64PointerSize>(kMutatorLock); + + DCHECK_GE(scratch_regs.size(), 2u); + XRegister scratch = scratch_regs[0].AsRiscv64().AsXRegister(); + XRegister scratch2 = scratch_regs[1].AsRiscv64().AsXRegister(); + + // CAS release, old_value = kRunnableStateValue, new_value = kNativeStateValue, no flags. + Riscv64Label retry; + __ Bind(&retry); + static_assert(thread_flags_offset.Int32Value() == 0); // LR/SC require exact address. + __ LrW(scratch, TR, AqRl::kNone); + __ Li(scratch2, kNativeStateValue); + // If any flags are set, go to the slow path. + static_assert(kRunnableStateValue == 0u); + __ Bnez(scratch, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); + __ ScW(scratch, scratch2, TR, AqRl::kRelease); + __ Bnez(scratch, &retry); + + // Clear `self->tlsPtr_.held_mutexes[kMutatorLock]`. + __ Stored(Zero, TR, thread_held_mutex_mutator_lock_offset.Int32Value()); +} + +void Riscv64JNIMacroAssembler::TryToTransitionFromNativeToRunnable( + JNIMacroLabel* label, + ArrayRef<const ManagedRegister> scratch_regs, + ManagedRegister return_reg) { + constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative); + constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable); + constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kRiscv64PointerSize>(); + constexpr ThreadOffset64 thread_held_mutex_mutator_lock_offset = + Thread::HeldMutexOffset<kRiscv64PointerSize>(kMutatorLock); + constexpr ThreadOffset64 thread_mutator_lock_offset = + Thread::MutatorLockOffset<kRiscv64PointerSize>(); + + DCHECK_GE(scratch_regs.size(), 2u); + DCHECK(!scratch_regs[0].AsRiscv64().Overlaps(return_reg.AsRiscv64())); + XRegister scratch = scratch_regs[0].AsRiscv64().AsXRegister(); + DCHECK(!scratch_regs[1].AsRiscv64().Overlaps(return_reg.AsRiscv64())); + XRegister scratch2 = scratch_regs[1].AsRiscv64().AsXRegister(); + + // CAS acquire, old_value = kNativeStateValue, new_value = kRunnableStateValue, no flags. + Riscv64Label retry; + __ Bind(&retry); + static_assert(thread_flags_offset.Int32Value() == 0); // LR/SC require exact address. + __ LrW(scratch, TR, AqRl::kAcquire); + __ Li(scratch2, kNativeStateValue); + // If any flags are set, or the state is not Native, go to the slow path. + // (While the thread can theoretically transition between different Suspended states, + // it would be very unexpected to see a state other than Native at this point.) + __ Bne(scratch, scratch2, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); + static_assert(kRunnableStateValue == 0u); + __ ScW(scratch, Zero, TR, AqRl::kNone); + __ Bnez(scratch, &retry); + + // Set `self->tlsPtr_.held_mutexes[kMutatorLock]` to the mutator lock. + __ Loadd(scratch, TR, thread_mutator_lock_offset.Int32Value()); + __ Stored(scratch, TR, thread_held_mutex_mutator_lock_offset.Int32Value()); +} + +void Riscv64JNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) { + ScratchRegisterScope srs(&asm_); + XRegister tmp = srs.AllocateXRegister(); + __ Loadw(tmp, TR, Thread::ThreadFlagsOffset<kRiscv64PointerSize>().Int32Value()); + DCHECK(IsInt<12>(dchecked_integral_cast<int32_t>(Thread::SuspendOrCheckpointRequestFlags()))); + __ Andi(tmp, tmp, dchecked_integral_cast<int32_t>(Thread::SuspendOrCheckpointRequestFlags())); + __ Bnez(tmp, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); +} + +void Riscv64JNIMacroAssembler::ExceptionPoll(JNIMacroLabel* label) { + ScratchRegisterScope srs(&asm_); + XRegister tmp = srs.AllocateXRegister(); + __ Loadd(tmp, TR, Thread::ExceptionOffset<kRiscv64PointerSize>().Int32Value()); + __ Bnez(tmp, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); +} + +void Riscv64JNIMacroAssembler::DeliverPendingException() { + // Pass exception object as argument. + // Don't care about preserving A0 as this won't return. + // Note: The scratch register from `ExceptionPoll()` may have been clobbered. + __ Loadd(A0, TR, Thread::ExceptionOffset<kRiscv64PointerSize>().Int32Value()); + __ Loadd(RA, TR, QUICK_ENTRYPOINT_OFFSET(kRiscv64PointerSize, pDeliverException).Int32Value()); + __ Jalr(RA); + // Call should never return. + __ Unimp(); +} + +std::unique_ptr<JNIMacroLabel> Riscv64JNIMacroAssembler::CreateLabel() { + return std::unique_ptr<JNIMacroLabel>(new Riscv64JNIMacroLabel()); +} + +void Riscv64JNIMacroAssembler::Jump(JNIMacroLabel* label) { + CHECK(label != nullptr); + __ J(down_cast<Riscv64Label*>(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64())); +} + +void Riscv64JNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) { + CHECK(label != nullptr); + + DCHECK_EQ(Thread::IsGcMarkingSize(), 4u); + + ScratchRegisterScope srs(&asm_); + XRegister test_reg = srs.AllocateXRegister(); + int32_t is_gc_marking_offset = Thread::IsGcMarkingOffset<kRiscv64PointerSize>().Int32Value(); + __ Loadw(test_reg, TR, is_gc_marking_offset); + switch (cond) { + case JNIMacroUnaryCondition::kZero: + __ Beqz(test_reg, down_cast<Riscv64Label*>(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64())); + break; + case JNIMacroUnaryCondition::kNotZero: + __ Bnez(test_reg, down_cast<Riscv64Label*>(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64())); + break; + default: + LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(cond); + UNREACHABLE(); + } +} + +void Riscv64JNIMacroAssembler::TestMarkBit(ManagedRegister m_ref, + JNIMacroLabel* label, + JNIMacroUnaryCondition cond) { + XRegister ref = m_ref.AsRiscv64().AsXRegister(); + ScratchRegisterScope srs(&asm_); + XRegister tmp = srs.AllocateXRegister(); + __ Loadw(tmp, ref, mirror::Object::MonitorOffset().Int32Value()); + // Move the bit we want to check to the sign bit, so that we can use BGEZ/BLTZ + // to check it. Extracting the bit for BEQZ/BNEZ would require one more instruction. + static_assert(LockWord::kMarkBitStateSize == 1u); + __ Slliw(tmp, tmp, 31 - LockWord::kMarkBitStateShift); + switch (cond) { + case JNIMacroUnaryCondition::kZero: + __ Bgez(tmp, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); + break; + case JNIMacroUnaryCondition::kNotZero: + __ Bltz(tmp, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); + break; + default: + LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(cond); + UNREACHABLE(); + } +} + +void Riscv64JNIMacroAssembler::TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) { + int32_t small_offset = dchecked_integral_cast<int32_t>(address & 0xfff) - + dchecked_integral_cast<int32_t>((address & 0x800) << 1); + int64_t remainder = static_cast<int64_t>(address) - small_offset; + ScratchRegisterScope srs(&asm_); + XRegister tmp = srs.AllocateXRegister(); + __ LoadConst64(tmp, remainder); + __ Lb(tmp, tmp, small_offset); + __ Bnez(tmp, down_cast<Riscv64Label*>(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64())); +} + +void Riscv64JNIMacroAssembler::Bind(JNIMacroLabel* label) { + CHECK(label != nullptr); + __ Bind(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()); +} + +void Riscv64JNIMacroAssembler::CreateJObject(ManagedRegister m_dest, + FrameOffset spilled_reference_offset, + ManagedRegister m_ref, + bool null_allowed) { + Riscv64ManagedRegister dest = m_dest.AsRiscv64(); + Riscv64ManagedRegister ref = m_ref.AsRiscv64(); + DCHECK(dest.IsXRegister()); + DCHECK(ref.IsXRegister()); + + Riscv64Label null_label; + if (null_allowed) { + if (!dest.Equals(ref)) { + __ Li(dest.AsXRegister(), 0); + } + __ Beqz(ref.AsXRegister(), &null_label); + } + __ AddConst64(dest.AsXRegister(), SP, spilled_reference_offset.Int32Value()); + if (null_allowed) { + __ Bind(&null_label); + } +} + +#undef __ + +} // namespace riscv64 +} // namespace art diff --git a/compiler/utils/riscv64/jni_macro_assembler_riscv64.h b/compiler/utils/riscv64/jni_macro_assembler_riscv64.h new file mode 100644 index 0000000000..79618e2c8e --- /dev/null +++ b/compiler/utils/riscv64/jni_macro_assembler_riscv64.h @@ -0,0 +1,166 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_RISCV64_JNI_MACRO_ASSEMBLER_RISCV64_H_ +#define ART_COMPILER_UTILS_RISCV64_JNI_MACRO_ASSEMBLER_RISCV64_H_ + +#include <stdint.h> +#include <memory> +#include <vector> + +#include <android-base/logging.h> + +#include "assembler_riscv64.h" +#include "base/arena_containers.h" +#include "base/enums.h" +#include "base/macros.h" +#include "offsets.h" +#include "utils/assembler.h" +#include "utils/jni_macro_assembler.h" + +namespace art HIDDEN { +namespace riscv64 { + +class Riscv64JNIMacroAssembler : public JNIMacroAssemblerFwd<Riscv64Assembler, PointerSize::k64> { + public: + explicit Riscv64JNIMacroAssembler(ArenaAllocator* allocator) + : JNIMacroAssemblerFwd<Riscv64Assembler, PointerSize::k64>(allocator) {} + ~Riscv64JNIMacroAssembler(); + + // Finalize the code. + void FinalizeCode() override; + + // Emit code that will create an activation on the stack. + void BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> callee_save_regs) override; + + // Emit code that will remove an activation from the stack. + void RemoveFrame(size_t frame_size, + ArrayRef<const ManagedRegister> callee_save_regs, + bool may_suspend) override; + + void IncreaseFrameSize(size_t adjust) override; + void DecreaseFrameSize(size_t adjust) override; + + ManagedRegister CoreRegisterWithSize(ManagedRegister src, size_t size) override; + + // Store routines. + void Store(FrameOffset offs, ManagedRegister src, size_t size) override; + void Store(ManagedRegister base, MemberOffset offs, ManagedRegister src, size_t size) override; + void StoreRawPtr(FrameOffset offs, ManagedRegister src) override; + void StoreStackPointerToThread(ThreadOffset64 offs, bool tag_sp) override; + + // Load routines. + void Load(ManagedRegister dest, FrameOffset offs, size_t size) override; + void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) override; + void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) override; + void LoadGcRootWithoutReadBarrier(ManagedRegister dest, + ManagedRegister base, + MemberOffset offs) override; + + // Copying routines. + void MoveArguments(ArrayRef<ArgumentLocation> dests, + ArrayRef<ArgumentLocation> srcs, + ArrayRef<FrameOffset> refs) override; + void Move(ManagedRegister dest, ManagedRegister src, size_t size) override; + void Move(ManagedRegister dest, size_t value) override; + + // Sign extension. + void SignExtend(ManagedRegister mreg, size_t size) override; + + // Zero extension. + void ZeroExtend(ManagedRegister mreg, size_t size) override; + + // Exploit fast access in managed code to Thread::Current(). + void GetCurrentThread(ManagedRegister dest) override; + void GetCurrentThread(FrameOffset offset) override; + + // Decode JNI transition or local `jobject`. For (weak) global `jobject`, jump to slow path. + void DecodeJNITransitionOrLocalJObject(ManagedRegister reg, + JNIMacroLabel* slow_path, + JNIMacroLabel* resume) override; + + // Heap::VerifyObject on src. In some cases (such as a reference to this) we + // know that src may not be null. + void VerifyObject(ManagedRegister src, bool could_be_null) override; + void VerifyObject(FrameOffset src, bool could_be_null) override; + + // Jump to address held at [base+offset] (used for tail calls). + void Jump(ManagedRegister base, Offset offset) override; + + // Call to address held at [base+offset]. + void Call(ManagedRegister base, Offset offset) override; + void CallFromThread(ThreadOffset64 offset) override; + + // Generate fast-path for transition to Native. Go to `label` if any thread flag is set. + // The implementation can use `scratch_regs` which should be callee save core registers + // (already saved before this call) and must preserve all argument registers. + void TryToTransitionFromRunnableToNative(JNIMacroLabel* label, + ArrayRef<const ManagedRegister> scratch_regs) override; + + // Generate fast-path for transition to Runnable. Go to `label` if any thread flag is set. + // The implementation can use `scratch_regs` which should be core argument registers + // not used as return registers and it must preserve the `return_reg` if any. + void TryToTransitionFromNativeToRunnable(JNIMacroLabel* label, + ArrayRef<const ManagedRegister> scratch_regs, + ManagedRegister return_reg) override; + + // Generate suspend check and branch to `label` if there is a pending suspend request. + void SuspendCheck(JNIMacroLabel* label) override; + + // Generate code to check if Thread::Current()->exception_ is non-null + // and branch to the `label` if it is. + void ExceptionPoll(JNIMacroLabel* label) override; + // Deliver pending exception. + void DeliverPendingException() override; + + // Create a new label that can be used with Jump/Bind calls. + std::unique_ptr<JNIMacroLabel> CreateLabel() override; + // Emit an unconditional jump to the label. + void Jump(JNIMacroLabel* label) override; + // Emit a conditional jump to the label by applying a unary condition test to the GC marking flag. + void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override; + // Emit a conditional jump to the label by applying a unary condition test to object's mark bit. + void TestMarkBit(ManagedRegister ref, JNIMacroLabel* label, JNIMacroUnaryCondition cond) override; + // Emit a conditional jump to label if the loaded value from specified locations is not zero. + void TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) override; + // Code at this offset will serve as the target for the Jump call. + void Bind(JNIMacroLabel* label) override; + + private: + void CreateJObject(ManagedRegister m_dest, + FrameOffset spilled_reference_offset, + ManagedRegister m_ref, + bool null_allowed); + + ART_FRIEND_TEST(JniMacroAssemblerRiscv64Test, CreateJObject); +}; + +class Riscv64JNIMacroLabel final + : public JNIMacroLabelCommon<Riscv64JNIMacroLabel, + Riscv64Label, + InstructionSet::kRiscv64> { + public: + Riscv64Label* AsRiscv64() { + return AsPlatformLabel(); + } +}; + +} // namespace riscv64 +} // namespace art + +#endif // ART_COMPILER_UTILS_RISCV64_JNI_MACRO_ASSEMBLER_RISCV64_H_ diff --git a/compiler/utils/riscv64/jni_macro_assembler_riscv64_test.cc b/compiler/utils/riscv64/jni_macro_assembler_riscv64_test.cc new file mode 100644 index 0000000000..004ba9bb7f --- /dev/null +++ b/compiler/utils/riscv64/jni_macro_assembler_riscv64_test.cc @@ -0,0 +1,959 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <dirent.h> +#include <errno.h> +#include <string.h> +#include <sys/types.h> + +#include <fstream> +#include <map> +#include <regex> + +#include "gtest/gtest.h" + +#include "indirect_reference_table.h" +#include "lock_word.h" +#include "jni/quick/calling_convention.h" +#include "utils/riscv64/jni_macro_assembler_riscv64.h" +#include "utils/assembler_test_base.h" + +#include "base/macros.h" +#include "base/malloc_arena_pool.h" + +namespace art HIDDEN { +namespace riscv64 { + +#define __ assembler_. + +class JniMacroAssemblerRiscv64Test : public AssemblerTestBase { + public: + JniMacroAssemblerRiscv64Test() : pool_(), allocator_(&pool_), assembler_(&allocator_) { } + + protected: + InstructionSet GetIsa() override { return InstructionSet::kRiscv64; } + + void DriverStr(const std::string& assembly_text, const std::string& test_name) { + assembler_.FinalizeCode(); + size_t cs = assembler_.CodeSize(); + std::vector<uint8_t> data(cs); + MemoryRegion code(&data[0], data.size()); + assembler_.CopyInstructions(code); + Driver(data, assembly_text, test_name); + } + + static Riscv64ManagedRegister AsManaged(XRegister reg) { + return Riscv64ManagedRegister::FromXRegister(reg); + } + + static Riscv64ManagedRegister AsManaged(FRegister reg) { + return Riscv64ManagedRegister::FromFRegister(reg); + } + + std::string EmitRet() { + __ RemoveFrame(/*frame_size=*/ 0u, + /*callee_save_regs=*/ ArrayRef<const ManagedRegister>(), + /*may_suspend=*/ false); + return "ret\n"; + } + + static const size_t kWordSize = 4u; + static const size_t kDoubleWordSize = 8u; + + MallocArenaPool pool_; + ArenaAllocator allocator_; + Riscv64JNIMacroAssembler assembler_; +}; + +TEST_F(JniMacroAssemblerRiscv64Test, StackFrame) { + std::string expected; + + std::unique_ptr<JniCallingConvention> jni_conv = JniCallingConvention::Create( + &allocator_, + /*is_static=*/ false, + /*is_synchronized=*/ false, + /*is_fast_native=*/ false, + /*is_critical_native=*/ false, + /*shorty=*/ "V", + InstructionSet::kRiscv64); + size_t frame_size = jni_conv->FrameSize(); + ManagedRegister method_reg = AsManaged(A0); + ArrayRef<const ManagedRegister> callee_save_regs = jni_conv->CalleeSaveRegisters(); + + __ BuildFrame(frame_size, method_reg, callee_save_regs); + expected += "addi sp, sp, -208\n" + "sd ra, 200(sp)\n" + "sd s11, 192(sp)\n" + "sd s10, 184(sp)\n" + "sd s9, 176(sp)\n" + "sd s8, 168(sp)\n" + "sd s7, 160(sp)\n" + "sd s6, 152(sp)\n" + "sd s5, 144(sp)\n" + "sd s4, 136(sp)\n" + "sd s3, 128(sp)\n" + "sd s2, 120(sp)\n" + "sd s0, 112(sp)\n" + "fsd fs11, 104(sp)\n" + "fsd fs10, 96(sp)\n" + "fsd fs9, 88(sp)\n" + "fsd fs8, 80(sp)\n" + "fsd fs7, 72(sp)\n" + "fsd fs6, 64(sp)\n" + "fsd fs5, 56(sp)\n" + "fsd fs4, 48(sp)\n" + "fsd fs3, 40(sp)\n" + "fsd fs2, 32(sp)\n" + "fsd fs1, 24(sp)\n" + "fsd fs0, 16(sp)\n" + "sd a0, 0(sp)\n"; + + __ RemoveFrame(frame_size, callee_save_regs, /*may_suspend=*/ false); + expected += "fld fs0, 16(sp)\n" + "fld fs1, 24(sp)\n" + "fld fs2, 32(sp)\n" + "fld fs3, 40(sp)\n" + "fld fs4, 48(sp)\n" + "fld fs5, 56(sp)\n" + "fld fs6, 64(sp)\n" + "fld fs7, 72(sp)\n" + "fld fs8, 80(sp)\n" + "fld fs9, 88(sp)\n" + "fld fs10, 96(sp)\n" + "fld fs11, 104(sp)\n" + "ld s0, 112(sp)\n" + "ld s2, 120(sp)\n" + "ld s3, 128(sp)\n" + "ld s4, 136(sp)\n" + "ld s5, 144(sp)\n" + "ld s6, 152(sp)\n" + "ld s7, 160(sp)\n" + "ld s8, 168(sp)\n" + "ld s9, 176(sp)\n" + "ld s10, 184(sp)\n" + "ld s11, 192(sp)\n" + "ld ra, 200(sp)\n" + "addi sp, sp, 208\n" + "ret\n"; + + DriverStr(expected, "StackFrame"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, ChangeFrameSize) { + std::string expected; + + __ IncreaseFrameSize(128); + expected += "addi sp, sp, -128\n"; + __ DecreaseFrameSize(128); + expected += "addi sp, sp, 128\n"; + + __ IncreaseFrameSize(0); // No-op + __ DecreaseFrameSize(0); // No-op + + __ IncreaseFrameSize(2048); + expected += "addi sp, sp, -2048\n"; + __ DecreaseFrameSize(2048); + expected += "addi t6, sp, 2047\n" + "addi sp, t6, 1\n"; + + __ IncreaseFrameSize(4096); + expected += "addi t6, sp, -2048\n" + "addi sp, t6, -2048\n"; + __ DecreaseFrameSize(4096); + expected += "lui t6, 1\n" + "add sp, sp, t6\n"; + + __ IncreaseFrameSize(6 * KB); + expected += "addi t6, zero, -3\n" + "slli t6, t6, 11\n" + "add sp, sp, t6\n"; + __ DecreaseFrameSize(6 * KB); + expected += "addi t6, zero, 3\n" + "slli t6, t6, 11\n" + "add sp, sp, t6\n"; + + __ IncreaseFrameSize(6 * KB + 16); + expected += "lui t6, 0xffffe\n" + "addiw t6, t6, 2048-16\n" + "add sp, sp, t6\n"; + __ DecreaseFrameSize(6 * KB + 16); + expected += "lui t6, 2\n" + "addiw t6, t6, 16-2048\n" + "add sp, sp, t6\n"; + + DriverStr(expected, "ChangeFrameSize"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, Store) { + std::string expected; + + __ Store(FrameOffset(0), AsManaged(A0), kWordSize); + expected += "sw a0, 0(sp)\n"; + __ Store(FrameOffset(2048), AsManaged(S0), kDoubleWordSize); + expected += "addi t6, sp, 0x7f8\n" + "sd s0, 8(t6)\n"; + + __ Store(AsManaged(A1), MemberOffset(256), AsManaged(S2), kDoubleWordSize); + expected += "sd s2, 256(a1)\n"; + __ Store(AsManaged(S3), MemberOffset(4 * KB), AsManaged(T1), kWordSize); + expected += "lui t6, 1\n" + "add t6, t6, s3\n" + "sw t1, 0(t6)\n"; + + __ Store(AsManaged(A3), MemberOffset(384), AsManaged(FA5), kDoubleWordSize); + expected += "fsd fa5, 384(a3)\n"; + __ Store(AsManaged(S4), MemberOffset(4 * KB + 16), AsManaged(FT10), kWordSize); + expected += "lui t6, 1\n" + "add t6, t6, s4\n" + "fsw ft10, 16(t6)\n"; + + __ StoreRawPtr(FrameOffset(128), AsManaged(A7)); + expected += "sd a7, 128(sp)\n"; + __ StoreRawPtr(FrameOffset(6 * KB), AsManaged(S11)); + expected += "lui t6, 2\n" + "add t6, t6, sp\n" + "sd s11, -2048(t6)\n"; + + __ StoreStackPointerToThread(ThreadOffset64(512), /*tag_sp=*/ false); + expected += "sd sp, 512(s1)\n"; + __ StoreStackPointerToThread(ThreadOffset64(3 * KB), /*tag_sp=*/ true); + expected += "ori t6, sp, 0x2\n" + "addi t5, s1, 0x7f8\n" + "sd t6, 0x408(t5)\n"; + + DriverStr(expected, "Store"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, Load) { + std::string expected; + + __ Load(AsManaged(A0), FrameOffset(0), kWordSize); + expected += "lw a0, 0(sp)\n"; + __ Load(AsManaged(S0), FrameOffset(2048), kDoubleWordSize); + expected += "addi t6, sp, 0x7f8\n" + "ld s0, 8(t6)\n"; + + __ Load(AsManaged(S2), AsManaged(A1), MemberOffset(256), kDoubleWordSize); + expected += "ld s2, 256(a1)\n"; + __ Load(AsManaged(T1), AsManaged(S3), MemberOffset(4 * KB), kWordSize); + expected += "lui t6, 1\n" + "add t6, t6, s3\n" + "lw t1, 0(t6)\n"; + + __ Load(AsManaged(FA5), AsManaged(A3), MemberOffset(384), kDoubleWordSize); + expected += "fld fa5, 384(a3)\n"; + __ Load(AsManaged(FT10), AsManaged(S4), MemberOffset(4 * KB + 16), kWordSize); + expected += "lui t6, 1\n" + "add t6, t6, s4\n" + "flw ft10, 16(t6)\n"; + + __ LoadRawPtrFromThread(AsManaged(A7), ThreadOffset64(512)); + expected += "ld a7, 512(s1)\n"; + __ LoadRawPtrFromThread(AsManaged(S11), ThreadOffset64(3 * KB)); + expected += "addi t6, s1, 0x7f8\n" + "ld s11, 0x408(t6)\n"; + + __ LoadGcRootWithoutReadBarrier(AsManaged(T0), AsManaged(A0), MemberOffset(0)); + expected += "lwu t0, 0(a0)\n"; + __ LoadGcRootWithoutReadBarrier(AsManaged(T1), AsManaged(S2), MemberOffset(0x800)); + expected += "addi t6, s2, 0x7f8\n" + "lwu t1, 8(t6)\n"; + + DriverStr(expected, "Load"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, CreateJObject) { + std::string expected; + + __ CreateJObject(AsManaged(A0), FrameOffset(8), AsManaged(A0), /*null_allowed=*/ true); + expected += "beqz a0, 1f\n" + "addi a0, sp, 8\n" + "1:\n"; + __ CreateJObject(AsManaged(A1), FrameOffset(12), AsManaged(A1), /*null_allowed=*/ false); + expected += "addi a1, sp, 12\n"; + __ CreateJObject(AsManaged(A2), FrameOffset(16), AsManaged(A3), /*null_allowed=*/ true); + expected += "li a2, 0\n" + "beqz a3, 2f\n" + "addi a2, sp, 16\n" + "2:\n"; + __ CreateJObject(AsManaged(A4), FrameOffset(2048), AsManaged(A5), /*null_allowed=*/ false); + expected += "addi t6, sp, 2047\n" + "addi a4, t6, 1\n"; + + DriverStr(expected, "CreateJObject"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, MoveArguments) { + std::string expected; + + static constexpr FrameOffset kInvalidReferenceOffset = + JNIMacroAssembler<kArmPointerSize>::kInvalidReferenceOffset; + static constexpr size_t kNativePointerSize = static_cast<size_t>(kRiscv64PointerSize); + static constexpr size_t kFloatSize = 4u; + static constexpr size_t kXlenInBytes = 8u; // Used for integral args and `double`. + + // Normal or @FastNative static with parameters "LIJIJILJI". + // Note: This shall not spill references to the stack. The JNI compiler spills + // references in an separate initial pass before moving arguments and creating `jobject`s. + ArgumentLocation move_dests1[] = { + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kNativePointerSize), // `jclass` + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kNativePointerSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kXlenInBytes), + ArgumentLocation(FrameOffset(0), kNativePointerSize), + ArgumentLocation(FrameOffset(8), kXlenInBytes), + ArgumentLocation(FrameOffset(16), kXlenInBytes), + }; + ArgumentLocation move_srcs1[] = { + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A0), kNativePointerSize), // `jclass` + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kVRegSize), + ArgumentLocation(FrameOffset(76), 2 * kVRegSize), + ArgumentLocation(FrameOffset(84), kVRegSize), + }; + FrameOffset move_refs1[] { + FrameOffset(kInvalidReferenceOffset), + FrameOffset(40), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(72), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + }; + __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests1), + ArrayRef<ArgumentLocation>(move_srcs1), + ArrayRef<FrameOffset>(move_refs1)); + expected += "beqz a7, 1f\n" + "addi a7, sp, 72\n" + "1:\n" + "sd a7, 0(sp)\n" + "ld t6, 76(sp)\n" + "sd t6, 8(sp)\n" + "lw t6, 84(sp)\n" + "sd t6, 16(sp)\n" + "mv a7, a6\n" + "mv a6, a5\n" + "mv a5, a4\n" + "mv a4, a3\n" + "mv a3, a2\n" + "li a2, 0\n" + "beqz a1, 2f\n" + "add a2, sp, 40\n" + "2:\n" + "mv a1, a0\n"; + + // Normal or @FastNative static with parameters "LIJIJILJI" - spill references. + ArgumentLocation move_dests1_spill_refs[] = { + ArgumentLocation(FrameOffset(40), kVRegSize), + ArgumentLocation(FrameOffset(72), kVRegSize), + }; + ArgumentLocation move_srcs1_spill_refs[] = { + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kVRegSize), + }; + FrameOffset move_refs1_spill_refs[] { + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + }; + __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests1_spill_refs), + ArrayRef<ArgumentLocation>(move_srcs1_spill_refs), + ArrayRef<FrameOffset>(move_refs1_spill_refs)); + expected += "sw a1, 40(sp)\n" + "sw a7, 72(sp)\n"; + + // Normal or @FastNative with parameters "LLIJIJIJLI" (first is `this`). + // Note: This shall not spill references to the stack. The JNI compiler spills + // references in an separate initial pass before moving arguments and creating `jobject`s. + ArgumentLocation move_dests2[] = { + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kNativePointerSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kNativePointerSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kXlenInBytes), + ArgumentLocation(FrameOffset(0), kXlenInBytes), + ArgumentLocation(FrameOffset(8), kNativePointerSize), + ArgumentLocation(FrameOffset(16), kXlenInBytes), + }; + ArgumentLocation move_srcs2[] = { + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kVRegSize), + ArgumentLocation(FrameOffset(76), 2 * kVRegSize), + ArgumentLocation(FrameOffset(84), kVRegSize), + ArgumentLocation(FrameOffset(88), kVRegSize), + }; + FrameOffset move_refs2[] { + FrameOffset(40), + FrameOffset(44), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(84), + FrameOffset(kInvalidReferenceOffset), + }; + __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests2), + ArrayRef<ArgumentLocation>(move_srcs2), + ArrayRef<FrameOffset>(move_refs2)); + // Args in A1-A7 do not move but references are converted to `jobject`. + expected += "addi a1, sp, 40\n" + "beqz a2, 1f\n" + "addi a2, sp, 44\n" + "1:\n" + "ld t6, 76(sp)\n" + "sd t6, 0(sp)\n" + "lwu t6, 84(sp)\n" + "beqz t6, 2f\n" + "addi t6, sp, 84\n" + "2:\n" + "sd t6, 8(sp)\n" + "lw t6, 88(sp)\n" + "sd t6, 16(sp)\n"; + + // Normal or @FastNative static with parameters "FDFDFDFDFDIJIJIJL". + ArgumentLocation move_dests3[] = { + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kNativePointerSize), // `jclass` + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA0), kFloatSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA1), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA2), kFloatSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA3), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA4), kFloatSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA5), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA6), kFloatSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA7), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kFloatSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kXlenInBytes), + ArgumentLocation(FrameOffset(0), kXlenInBytes), + ArgumentLocation(FrameOffset(8), kXlenInBytes), + ArgumentLocation(FrameOffset(16), kNativePointerSize), + }; + ArgumentLocation move_srcs3[] = { + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A0), kNativePointerSize), // `jclass` + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA0), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA1), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA2), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA3), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA4), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA5), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA6), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA7), 2 * kVRegSize), + ArgumentLocation(FrameOffset(88), kVRegSize), + ArgumentLocation(FrameOffset(92), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kVRegSize), + }; + FrameOffset move_refs3[] { + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(88), + }; + __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests3), + ArrayRef<ArgumentLocation>(move_srcs3), + ArrayRef<FrameOffset>(move_refs3)); + // FP args in FA0-FA7 do not move. + expected += "sd a5, 0(sp)\n" + "sd a6, 8(sp)\n" + "beqz a7, 1f\n" + "addi a7, sp, 88\n" + "1:\n" + "sd a7, 16(sp)\n" + "mv a5, a2\n" + "mv a6, a3\n" + "mv a7, a4\n" + "lw a2, 88(sp)\n" + "ld a3, 92(sp)\n" + "mv a4, a1\n" + "mv a1, a0\n"; + + // @CriticalNative with parameters "DFDFDFDFIDJIJFDIIJ". + ArgumentLocation move_dests4[] = { + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA0), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA1), kFloatSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA2), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA3), kFloatSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA4), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA5), kFloatSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA6), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA7), kFloatSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A0), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kFloatSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kXlenInBytes), + ArgumentLocation(FrameOffset(0), kXlenInBytes), + ArgumentLocation(FrameOffset(8), kXlenInBytes), + }; + ArgumentLocation move_srcs4[] = { + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA0), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA1), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA2), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA3), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA4), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA5), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA6), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA7), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kVRegSize), + ArgumentLocation(FrameOffset(92), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), 2 * kVRegSize), + ArgumentLocation(FrameOffset(112), kVRegSize), + ArgumentLocation(FrameOffset(116), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), 2 * kVRegSize), + }; + FrameOffset move_refs4[] { + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + }; + __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests4), + ArrayRef<ArgumentLocation>(move_srcs4), + ArrayRef<FrameOffset>(move_refs4)); + // FP args in FA0-FA7 and integral args in A2-A4 do not move. + expected += "sd a6, 0(sp)\n" + "sd a7, 8(sp)\n" + "mv a0, a1\n" + "ld a1, 92(sp)\n" + "ld a6, 116(sp)\n" + "mv a7, a5\n" + "lw a5, 112(sp)\n"; + + // @CriticalNative with parameters "JIJIJIJIJI". + ArgumentLocation move_dests5[] = { + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A0), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kXlenInBytes), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kXlenInBytes), + ArgumentLocation(FrameOffset(0), kXlenInBytes), + ArgumentLocation(FrameOffset(8), kXlenInBytes), + }; + ArgumentLocation move_srcs5[] = { + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), 2 * kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kVRegSize), + ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), 2 * kVRegSize), + ArgumentLocation(FrameOffset(84), kVRegSize), + ArgumentLocation(FrameOffset(88), 2 * kVRegSize), + ArgumentLocation(FrameOffset(96), kVRegSize), + }; + FrameOffset move_refs5[] { + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + FrameOffset(kInvalidReferenceOffset), + }; + __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests5), + ArrayRef<ArgumentLocation>(move_srcs5), + ArrayRef<FrameOffset>(move_refs5)); + expected += "ld t6, 88(sp)\n" + "sd t6, 0(sp)\n" + "lw t6, 96(sp)\n" + "sd t6, 8(sp)\n" + "mv a0, a1\n" + "mv a1, a2\n" + "mv a2, a3\n" + "mv a3, a4\n" + "mv a4, a5\n" + "mv a5, a6\n" + "mv a6, a7\n" + "lw a7, 84(sp)\n"; + + DriverStr(expected, "MoveArguments"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, Move) { + std::string expected; + + __ Move(AsManaged(A0), AsManaged(A1), kWordSize); + expected += "mv a0, a1\n"; + __ Move(AsManaged(A2), AsManaged(A3), kDoubleWordSize); + expected += "mv a2, a3\n"; + + __ Move(AsManaged(A4), AsManaged(A4), kWordSize); // No-op. + __ Move(AsManaged(A5), AsManaged(A5), kDoubleWordSize); // No-op. + + DriverStr(expected, "Move"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, GetCurrentThread) { + std::string expected; + + __ GetCurrentThread(AsManaged(A0)); + expected += "mv a0, s1\n"; + + __ GetCurrentThread(FrameOffset(256)); + expected += "sd s1, 256(sp)\n"; + __ GetCurrentThread(FrameOffset(3 * KB)); + expected += "addi t6, sp, 0x7f8\n" + "sd s1, 0x408(t6)\n"; + + DriverStr(expected, "GetCurrentThread"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, DecodeJNITransitionOrLocalJObject) { + std::string expected; + + constexpr int64_t kGlobalOrWeakGlobalMask = IndirectReferenceTable::GetGlobalOrWeakGlobalMask(); + constexpr int64_t kIndirectRefKindMask = IndirectReferenceTable::GetIndirectRefKindMask(); + + std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel(); + std::unique_ptr<JNIMacroLabel> resume = __ CreateLabel(); + + __ DecodeJNITransitionOrLocalJObject(AsManaged(A0), slow_path.get(), resume.get()); + expected += "beqz a0, 1f\n" + "andi t6, a0, " + std::to_string(kGlobalOrWeakGlobalMask) + "\n" + "bnez t6, 2f\n" + "andi a0, a0, ~" + std::to_string(kIndirectRefKindMask) + "\n" + "lw a0, (a0)\n"; + + __ Bind(resume.get()); + expected += "1:\n"; + + expected += EmitRet(); + + __ Bind(slow_path.get()); + expected += "2:\n"; + + __ Jump(resume.get()); + expected += "j 1b\n"; + + DriverStr(expected, "DecodeJNITransitionOrLocalJObject"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, JumpCodePointer) { + std::string expected; + + __ Jump(AsManaged(A0), Offset(24)); + expected += "ld t6, 24(a0)\n" + "jr t6\n"; + + __ Jump(AsManaged(S2), Offset(2048)); + expected += "addi t6, s2, 0x7f8\n" + "ld t6, 8(t6)\n" + "jr t6\n"; + + DriverStr(expected, "JumpCodePointer"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, Call) { + std::string expected; + + __ Call(AsManaged(A0), Offset(32)); + expected += "ld ra, 32(a0)\n" + "jalr ra\n"; + + __ Call(AsManaged(S2), Offset(2048)); + expected += "addi t6, s2, 0x7f8\n" + "ld ra, 8(t6)\n" + "jalr ra\n"; + + __ CallFromThread(ThreadOffset64(256)); + expected += "ld ra, 256(s1)\n" + "jalr ra\n"; + + __ CallFromThread(ThreadOffset64(3 * KB)); + expected += "addi t6, s1, 0x7f8\n" + "ld ra, 0x408(t6)\n" + "jalr ra\n"; + + DriverStr(expected, "Call"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, Transitions) { + std::string expected; + + constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative); + constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable); + static_assert(kRunnableStateValue == 0u); + constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kRiscv64PointerSize>(); + static_assert(thread_flags_offset.SizeValue() == 0u); + constexpr size_t thread_held_mutex_mutator_lock_offset = + Thread::HeldMutexOffset<kRiscv64PointerSize>(kMutatorLock).SizeValue(); + constexpr size_t thread_mutator_lock_offset = + Thread::MutatorLockOffset<kRiscv64PointerSize>().SizeValue(); + + std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel(); + std::unique_ptr<JNIMacroLabel> resume = __ CreateLabel(); + + const ManagedRegister raw_scratch_regs[] = { AsManaged(T0), AsManaged(T1) }; + const ArrayRef<const ManagedRegister> scratch_regs(raw_scratch_regs); + + __ TryToTransitionFromRunnableToNative(slow_path.get(), scratch_regs); + expected += "1:\n" + "lr.w t0, (s1)\n" + "li t1, " + std::to_string(kNativeStateValue) + "\n" + "bnez t0, 4f\n" + "sc.w.rl t0, t1, (s1)\n" + "bnez t0, 1b\n" + "addi t6, s1, 0x7f8\n" + "sd x0, " + std::to_string(thread_held_mutex_mutator_lock_offset - 0x7f8u) + "(t6)\n"; + + __ TryToTransitionFromNativeToRunnable(slow_path.get(), scratch_regs, AsManaged(A0)); + expected += "2:\n" + "lr.w.aq t0, (s1)\n" + "li t1, " + std::to_string(kNativeStateValue) + "\n" + "bne t0, t1, 4f\n" + "sc.w t0, x0, (s1)\n" + "bnez t0, 2b\n" + "ld t0, " + std::to_string(thread_mutator_lock_offset) + "(s1)\n" + "addi t6, s1, 0x7f8\n" + "sd t0, " + std::to_string(thread_held_mutex_mutator_lock_offset - 0x7f8u) + "(t6)\n"; + + __ Bind(resume.get()); + expected += "3:\n"; + + expected += EmitRet(); + + __ Bind(slow_path.get()); + expected += "4:\n"; + + __ Jump(resume.get()); + expected += "j 3b"; + + DriverStr(expected, "SuspendCheck"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, SuspendCheck) { + std::string expected; + + ThreadOffset64 thread_flags_offet = Thread::ThreadFlagsOffset<kRiscv64PointerSize>(); + + std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel(); + std::unique_ptr<JNIMacroLabel> resume = __ CreateLabel(); + + __ SuspendCheck(slow_path.get()); + expected += "lw t6, " + std::to_string(thread_flags_offet.Int32Value()) + "(s1)\n" + "andi t6, t6, " + std::to_string(Thread::SuspendOrCheckpointRequestFlags()) + "\n" + "bnez t6, 2f\n"; + + __ Bind(resume.get()); + expected += "1:\n"; + + expected += EmitRet(); + + __ Bind(slow_path.get()); + expected += "2:\n"; + + __ Jump(resume.get()); + expected += "j 1b"; + + DriverStr(expected, "SuspendCheck"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, Exception) { + std::string expected; + + ThreadOffset64 exception_offset = Thread::ExceptionOffset<kArm64PointerSize>(); + ThreadOffset64 deliver_offset = QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pDeliverException); + + std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel(); + + __ ExceptionPoll(slow_path.get()); + expected += "ld t6, " + std::to_string(exception_offset.Int32Value()) + "(s1)\n" + "bnez t6, 1f\n"; + + expected += EmitRet(); + + __ Bind(slow_path.get()); + expected += "1:\n"; + + __ DeliverPendingException(); + expected += "ld a0, " + std::to_string(exception_offset.Int32Value()) + "(s1)\n" + "ld ra, " + std::to_string(deliver_offset.Int32Value()) + "(s1)\n" + "jalr ra\n" + "unimp\n"; + + DriverStr(expected, "Exception"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, JumpLabel) { + std::string expected; + + std::unique_ptr<JNIMacroLabel> target = __ CreateLabel(); + std::unique_ptr<JNIMacroLabel> back = __ CreateLabel(); + + __ Jump(target.get()); + expected += "j 2f\n"; + + __ Bind(back.get()); + expected += "1:\n"; + + __ Move(AsManaged(A0), AsManaged(A1), static_cast<size_t>(kRiscv64PointerSize)); + expected += "mv a0, a1\n"; + + __ Bind(target.get()); + expected += "2:\n"; + + __ Jump(back.get()); + expected += "j 1b\n"; + + DriverStr(expected, "JumpLabel"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, ReadBarrier) { + std::string expected; + + ThreadOffset64 is_gc_marking_offset = Thread::IsGcMarkingOffset<kRiscv64PointerSize>(); + MemberOffset monitor_offset = mirror::Object::MonitorOffset(); + + std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel(); + std::unique_ptr<JNIMacroLabel> resume = __ CreateLabel(); + + __ TestGcMarking(slow_path.get(), JNIMacroUnaryCondition::kNotZero); + expected += "lw t6, " + std::to_string(is_gc_marking_offset.Int32Value()) + "(s1)\n" + "bnez t6, 2f\n"; + + __ TestGcMarking(slow_path.get(), JNIMacroUnaryCondition::kZero); + expected += "lw t6, " + std::to_string(is_gc_marking_offset.Int32Value()) + "(s1)\n" + "beqz t6, 2f\n"; + + __ Bind(resume.get()); + expected += "1:\n"; + + expected += EmitRet(); + + __ Bind(slow_path.get()); + expected += "2:\n"; + + __ TestMarkBit(AsManaged(A0), resume.get(), JNIMacroUnaryCondition::kNotZero); + expected += "lw t6, " + std::to_string(monitor_offset.Int32Value()) + "(a0)\n" + "slliw t6, t6, " + std::to_string(31 - LockWord::kMarkBitStateShift) + "\n" + "bltz t6, 1b\n"; + + __ TestMarkBit(AsManaged(T0), resume.get(), JNIMacroUnaryCondition::kZero); + expected += "lw t6, " + std::to_string(monitor_offset.Int32Value()) + "(t0)\n" + "slliw t6, t6, " + std::to_string(31 - LockWord::kMarkBitStateShift) + "\n" + "bgez t6, 1b\n"; + + DriverStr(expected, "ReadBarrier"); +} + +TEST_F(JniMacroAssemblerRiscv64Test, TestByteAndJumpIfNotZero) { + // Note: The `TestByteAndJumpIfNotZero()` takes the address as a `uintptr_t`. + // Use 32-bit addresses, so that we can include this test in 32-bit host tests. + + std::string expected; + + std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel(); + std::unique_ptr<JNIMacroLabel> resume = __ CreateLabel(); + + __ TestByteAndJumpIfNotZero(0x12345678u, slow_path.get()); + expected += "lui t6, 0x12345\n" + "lb t6, 0x678(t6)\n" + "bnez t6, 2f\n"; + + __ TestByteAndJumpIfNotZero(0x87654321u, slow_path.get()); + expected += "lui t6, 0x87654/4\n" + "slli t6, t6, 2\n" + "lb t6, 0x321(t6)\n" + "bnez t6, 2f\n"; + + __ Bind(resume.get()); + expected += "1:\n"; + + expected += EmitRet(); + + __ Bind(slow_path.get()); + expected += "2:\n"; + + __ TestByteAndJumpIfNotZero(0x456789abu, resume.get()); + expected += "lui t6, 0x45678+1\n" + "lb t6, 0x9ab-0x1000(t6)\n" + "bnez t6, 1b\n"; + + DriverStr(expected, "TestByteAndJumpIfNotZero"); +} + +#undef __ + +} // namespace riscv64 +} // namespace art diff --git a/compiler/utils/riscv64/managed_register_riscv64.cc b/compiler/utils/riscv64/managed_register_riscv64.cc index 560019ae09..99bd4be784 100644 --- a/compiler/utils/riscv64/managed_register_riscv64.cc +++ b/compiler/utils/riscv64/managed_register_riscv64.cc @@ -18,7 +18,7 @@ #include "base/globals.h" -namespace art { +namespace art HIDDEN { namespace riscv64 { bool Riscv64ManagedRegister::Overlaps(const Riscv64ManagedRegister& other) const { diff --git a/compiler/utils/riscv64/managed_register_riscv64.h b/compiler/utils/riscv64/managed_register_riscv64.h index 8e02a9dcc8..622d766945 100644 --- a/compiler/utils/riscv64/managed_register_riscv64.h +++ b/compiler/utils/riscv64/managed_register_riscv64.h @@ -24,7 +24,7 @@ #include "base/macros.h" #include "utils/managed_register.h" -namespace art { +namespace art HIDDEN { namespace riscv64 { const int kNumberOfXRegIds = kNumberOfXRegisters; diff --git a/compiler/utils/riscv64/managed_register_riscv64_test.cc b/compiler/utils/riscv64/managed_register_riscv64_test.cc index c6ad2dc38a..d7012a796a 100644 --- a/compiler/utils/riscv64/managed_register_riscv64_test.cc +++ b/compiler/utils/riscv64/managed_register_riscv64_test.cc @@ -19,7 +19,7 @@ #include "base/globals.h" #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { namespace riscv64 { TEST(Riscv64ManagedRegister, NoRegister) { diff --git a/compiler/utils/stack_checks.h b/compiler/utils/stack_checks.h index d0fff73df3..1be4532f3e 100644 --- a/compiler/utils/stack_checks.h +++ b/compiler/utils/stack_checks.h @@ -35,7 +35,7 @@ static constexpr size_t kSmallFrameSize = 1 * KB; // stack overflow check on method entry. // // A frame is considered large when it's above kLargeFrameSize. -static inline bool FrameNeedsStackCheck(size_t size, InstructionSet isa ATTRIBUTE_UNUSED) { +static inline bool FrameNeedsStackCheck(size_t size, [[maybe_unused]] InstructionSet isa) { return size >= kLargeFrameSize; } diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 0f7854dc5c..e6503045fa 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -955,6 +955,12 @@ class X86Assembler final : public Assembler { lock()->xaddl(address, reg); } + void rdtsc() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x31); + } + // // Misc. functionality // diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index 5da6f04402..432322aea7 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -89,19 +89,7 @@ class AssemblerX86Test : public AssemblerTest<x86::X86Assembler, addresses_.push_back(x86::Address(x86::ESP, 987654321)); } - if (registers_.size() == 0) { - registers_.insert(end(registers_), - { - new x86::Register(x86::EAX), - new x86::Register(x86::EBX), - new x86::Register(x86::ECX), - new x86::Register(x86::EDX), - new x86::Register(x86::EBP), - new x86::Register(x86::ESP), - new x86::Register(x86::ESI), - new x86::Register(x86::EDI) - }); - + if (secondary_register_names_.empty()) { secondary_register_names_.emplace(x86::Register(x86::EAX), "ax"); secondary_register_names_.emplace(x86::Register(x86::EBX), "bx"); secondary_register_names_.emplace(x86::Register(x86::ECX), "cx"); @@ -121,38 +109,28 @@ class AssemblerX86Test : public AssemblerTest<x86::X86Assembler, tertiary_register_names_.emplace(x86::Register(x86::ESI), "dh"); tertiary_register_names_.emplace(x86::Register(x86::EDI), "bh"); } - - if (fp_registers_.size() == 0) { - fp_registers_.insert(end(fp_registers_), - { - new x86::XmmRegister(x86::XMM0), - new x86::XmmRegister(x86::XMM1), - new x86::XmmRegister(x86::XMM2), - new x86::XmmRegister(x86::XMM3), - new x86::XmmRegister(x86::XMM4), - new x86::XmmRegister(x86::XMM5), - new x86::XmmRegister(x86::XMM6), - new x86::XmmRegister(x86::XMM7) - }); - } } void TearDown() override { AssemblerTest::TearDown(); - STLDeleteElements(®isters_); - STLDeleteElements(&fp_registers_); } std::vector<x86::Address> GetAddresses() override { return addresses_; } - std::vector<x86::Register*> GetRegisters() override { - return registers_; + ArrayRef<const x86::Register> GetRegisters() override { + static constexpr x86::Register kRegisters[] = { + x86::EAX, x86::EBX, x86::ECX, x86::EDX, x86::EBP, x86::ESP, x86::ESI, x86::EDI + }; + return ArrayRef<const x86::Register>(kRegisters); } - std::vector<x86::XmmRegister*> GetFPRegisters() override { - return fp_registers_; + ArrayRef<const x86::XmmRegister> GetFPRegisters() override { + static constexpr x86::XmmRegister kFPRegisters[] = { + x86::XMM0, x86::XMM1, x86::XMM2, x86::XMM3, x86::XMM4, x86::XMM5, x86::XMM6, x86::XMM7 + }; + return ArrayRef<const x86::XmmRegister>(kFPRegisters); } x86::Immediate CreateImmediate(int64_t imm_value) override { @@ -173,10 +151,8 @@ class AssemblerX86Test : public AssemblerTest<x86::X86Assembler, private: std::vector<x86::Address> addresses_; - std::vector<x86::Register*> registers_; std::map<x86::Register, std::string, X86RegisterCompare> secondary_register_names_; std::map<x86::Register, std::string, X86RegisterCompare> tertiary_register_names_; - std::vector<x86::XmmRegister*> fp_registers_; }; class AssemblerX86AVXTest : public AssemblerX86Test { @@ -267,28 +243,28 @@ TEST_F(AssemblerX86Test, RepeatAF) { TEST_F(AssemblerX86Test, PoplAllAddresses) { // Make sure all addressing modes combinations are tested at least once. std::vector<x86::Address> all_addresses; - for (x86::Register* base : GetRegisters()) { + for (x86::Register base : GetRegisters()) { // Base only. - all_addresses.push_back(x86::Address(*base, -1)); - all_addresses.push_back(x86::Address(*base, 0)); - all_addresses.push_back(x86::Address(*base, 1)); - all_addresses.push_back(x86::Address(*base, 123456789)); - for (x86::Register* index : GetRegisters()) { - if (*index == x86::ESP) { + all_addresses.push_back(x86::Address(base, -1)); + all_addresses.push_back(x86::Address(base, 0)); + all_addresses.push_back(x86::Address(base, 1)); + all_addresses.push_back(x86::Address(base, 123456789)); + for (x86::Register index : GetRegisters()) { + if (index == x86::ESP) { // Index cannot be ESP. continue; - } else if (*base == *index) { + } else if (base == index) { // Index only. - all_addresses.push_back(x86::Address(*index, TIMES_1, -1)); - all_addresses.push_back(x86::Address(*index, TIMES_2, 0)); - all_addresses.push_back(x86::Address(*index, TIMES_4, 1)); - all_addresses.push_back(x86::Address(*index, TIMES_8, 123456789)); + all_addresses.push_back(x86::Address(index, TIMES_1, -1)); + all_addresses.push_back(x86::Address(index, TIMES_2, 0)); + all_addresses.push_back(x86::Address(index, TIMES_4, 1)); + all_addresses.push_back(x86::Address(index, TIMES_8, 123456789)); } // Base and index. - all_addresses.push_back(x86::Address(*base, *index, TIMES_1, -1)); - all_addresses.push_back(x86::Address(*base, *index, TIMES_2, 0)); - all_addresses.push_back(x86::Address(*base, *index, TIMES_4, 1)); - all_addresses.push_back(x86::Address(*base, *index, TIMES_8, 123456789)); + all_addresses.push_back(x86::Address(base, index, TIMES_1, -1)); + all_addresses.push_back(x86::Address(base, index, TIMES_2, 0)); + all_addresses.push_back(x86::Address(base, index, TIMES_4, 1)); + all_addresses.push_back(x86::Address(base, index, TIMES_8, 123456789)); } } DriverStr(RepeatA(&x86::X86Assembler::popl, all_addresses, "popl {mem}"), "popq"); @@ -510,11 +486,11 @@ TEST_F(AssemblerX86Test, PopcntlAddress) { // Rorl only allows CL as the shift count. std::string rorl_fn(AssemblerX86Test::Base* assembler_test, x86::X86Assembler* assembler) { std::ostringstream str; - std::vector<x86::Register*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86::Register> registers = assembler_test->GetRegisters(); x86::Register shifter(x86::ECX); - for (auto reg : registers) { - assembler->rorl(*reg, shifter); - str << "rorl %cl, %" << assembler_test->GetRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->rorl(reg, shifter); + str << "rorl %cl, %" << assembler_test->GetRegisterName(reg) << "\n"; } return str.str(); } @@ -530,11 +506,11 @@ TEST_F(AssemblerX86Test, RorlImm) { // Roll only allows CL as the shift count. std::string roll_fn(AssemblerX86Test::Base* assembler_test, x86::X86Assembler* assembler) { std::ostringstream str; - std::vector<x86::Register*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86::Register> registers = assembler_test->GetRegisters(); x86::Register shifter(x86::ECX); - for (auto reg : registers) { - assembler->roll(*reg, shifter); - str << "roll %cl, %" << assembler_test->GetRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->roll(reg, shifter); + str << "roll %cl, %" << assembler_test->GetRegisterName(reg) << "\n"; } return str.str(); } @@ -1379,27 +1355,27 @@ TEST_F(AssemblerX86Test, AddressDisplaceBy) { for (int32_t disp0 : displacements) { // initial displacement for (int32_t disp : displacements) { // extra displacement - for (const x86::Register *reg : GetRegisters()) { + for (x86::Register reg : GetRegisters()) { // Test non-SIB addressing. - EXPECT_EQ(x86::Address::displace(x86::Address(*reg, disp0), disp), - x86::Address(*reg, disp0 + disp)); + EXPECT_EQ(x86::Address::displace(x86::Address(reg, disp0), disp), + x86::Address(reg, disp0 + disp)); // Test SIB addressing with EBP base. - if (*reg != x86::ESP) { + if (reg != x86::ESP) { for (ScaleFactor scale : scales) { - EXPECT_EQ(x86::Address::displace(x86::Address(*reg, scale, disp0), disp), - x86::Address(*reg, scale, disp0 + disp)); + EXPECT_EQ(x86::Address::displace(x86::Address(reg, scale, disp0), disp), + x86::Address(reg, scale, disp0 + disp)); } } // Test SIB addressing with different base. - for (const x86::Register *index : GetRegisters()) { - if (*index == x86::ESP) { + for (x86::Register index : GetRegisters()) { + if (index == x86::ESP) { continue; // Skip ESP as it cannot be used with this address constructor. } for (ScaleFactor scale : scales) { - EXPECT_EQ(x86::Address::displace(x86::Address(*reg, *index, scale, disp0), disp), - x86::Address(*reg, *index, scale, disp0 + disp)); + EXPECT_EQ(x86::Address::displace(x86::Address(reg, index, scale, disp0), disp), + x86::Address(reg, index, scale, disp0 + disp)); } } diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc index 154e50b4e4..dfdbc183f1 100644 --- a/compiler/utils/x86/jni_macro_assembler_x86.cc +++ b/compiler/utils/x86/jni_macro_assembler_x86.cc @@ -83,7 +83,7 @@ void X86JNIMacroAssembler::BuildFrame(size_t frame_size, void X86JNIMacroAssembler::RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> spill_regs, - bool may_suspend ATTRIBUTE_UNUSED) { + [[maybe_unused]] bool may_suspend) { CHECK_ALIGNED(frame_size, kNativeStackAlignment); cfi().RememberState(); // -kFramePointerSize for ArtMethod*. diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 3fdf05bed9..e2a32a7337 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -5244,6 +5244,12 @@ void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) { EmitOperand(dst.LowBits(), src); } +void X86_64Assembler::rdtsc() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x31); +} + void X86_64Assembler::repne_scasb() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF2); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 235ea03e2b..cb62500bc9 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -964,6 +964,8 @@ class X86_64Assembler final : public Assembler { void popcntq(CpuRegister dst, CpuRegister src); void popcntq(CpuRegister dst, const Address& src); + void rdtsc(); + void rorl(CpuRegister reg, const Immediate& imm); void rorl(CpuRegister operand, CpuRegister shifter); void roll(CpuRegister reg, const Immediate& imm); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index a7c206afaa..e985441101 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -199,24 +199,7 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler, addresses_.push_back(x86_64::Address(x86_64::CpuRegister(x86_64::R15), 123456789)); } - if (registers_.size() == 0) { - registers_.push_back(new x86_64::CpuRegister(x86_64::RAX)); - registers_.push_back(new x86_64::CpuRegister(x86_64::RBX)); - registers_.push_back(new x86_64::CpuRegister(x86_64::RCX)); - registers_.push_back(new x86_64::CpuRegister(x86_64::RDX)); - registers_.push_back(new x86_64::CpuRegister(x86_64::RBP)); - registers_.push_back(new x86_64::CpuRegister(x86_64::RSP)); - registers_.push_back(new x86_64::CpuRegister(x86_64::RSI)); - registers_.push_back(new x86_64::CpuRegister(x86_64::RDI)); - registers_.push_back(new x86_64::CpuRegister(x86_64::R8)); - registers_.push_back(new x86_64::CpuRegister(x86_64::R9)); - registers_.push_back(new x86_64::CpuRegister(x86_64::R10)); - registers_.push_back(new x86_64::CpuRegister(x86_64::R11)); - registers_.push_back(new x86_64::CpuRegister(x86_64::R12)); - registers_.push_back(new x86_64::CpuRegister(x86_64::R13)); - registers_.push_back(new x86_64::CpuRegister(x86_64::R14)); - registers_.push_back(new x86_64::CpuRegister(x86_64::R15)); - + if (secondary_register_names_.empty()) { secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::RAX), "eax"); secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBX), "ebx"); secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::RCX), "ecx"); @@ -267,42 +250,59 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler, quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R13), "r13b"); quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14b"); quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15b"); - - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM0)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM1)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM2)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM3)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM4)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM5)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM6)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM7)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM8)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM9)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM10)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM11)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM12)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM13)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM14)); - fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM15)); } } void TearDown() override { AssemblerTest::TearDown(); - STLDeleteElements(®isters_); - STLDeleteElements(&fp_registers_); } std::vector<x86_64::Address> GetAddresses() override { return addresses_; } - std::vector<x86_64::CpuRegister*> GetRegisters() override { - return registers_; + ArrayRef<const x86_64::CpuRegister> GetRegisters() override { + static constexpr x86_64::CpuRegister kRegisters[] = { + x86_64::CpuRegister(x86_64::RAX), + x86_64::CpuRegister(x86_64::RBX), + x86_64::CpuRegister(x86_64::RCX), + x86_64::CpuRegister(x86_64::RDX), + x86_64::CpuRegister(x86_64::RBP), + x86_64::CpuRegister(x86_64::RSP), + x86_64::CpuRegister(x86_64::RSI), + x86_64::CpuRegister(x86_64::RDI), + x86_64::CpuRegister(x86_64::R8), + x86_64::CpuRegister(x86_64::R9), + x86_64::CpuRegister(x86_64::R10), + x86_64::CpuRegister(x86_64::R11), + x86_64::CpuRegister(x86_64::R12), + x86_64::CpuRegister(x86_64::R13), + x86_64::CpuRegister(x86_64::R14), + x86_64::CpuRegister(x86_64::R15), + }; + return ArrayRef<const x86_64::CpuRegister>(kRegisters); } - std::vector<x86_64::XmmRegister*> GetFPRegisters() override { - return fp_registers_; + ArrayRef<const x86_64::XmmRegister> GetFPRegisters() override { + static constexpr x86_64::XmmRegister kFPRegisters[] = { + x86_64::XmmRegister(x86_64::XMM0), + x86_64::XmmRegister(x86_64::XMM1), + x86_64::XmmRegister(x86_64::XMM2), + x86_64::XmmRegister(x86_64::XMM3), + x86_64::XmmRegister(x86_64::XMM4), + x86_64::XmmRegister(x86_64::XMM5), + x86_64::XmmRegister(x86_64::XMM6), + x86_64::XmmRegister(x86_64::XMM7), + x86_64::XmmRegister(x86_64::XMM8), + x86_64::XmmRegister(x86_64::XMM9), + x86_64::XmmRegister(x86_64::XMM10), + x86_64::XmmRegister(x86_64::XMM11), + x86_64::XmmRegister(x86_64::XMM12), + x86_64::XmmRegister(x86_64::XMM13), + x86_64::XmmRegister(x86_64::XMM14), + x86_64::XmmRegister(x86_64::XMM15), + }; + return ArrayRef<const x86_64::XmmRegister>(kFPRegisters); } x86_64::Immediate CreateImmediate(int64_t imm_value) override { @@ -328,11 +328,9 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler, private: std::vector<x86_64::Address> addresses_; - std::vector<x86_64::CpuRegister*> registers_; std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> secondary_register_names_; std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> tertiary_register_names_; std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> quaternary_register_names_; - std::vector<x86_64::XmmRegister*> fp_registers_; }; class AssemblerX86_64AVXTest : public AssemblerX86_64Test { @@ -515,28 +513,28 @@ TEST_F(AssemblerX86_64Test, Toolchain) { TEST_F(AssemblerX86_64Test, PopqAllAddresses) { // Make sure all addressing modes combinations are tested at least once. std::vector<x86_64::Address> all_addresses; - for (x86_64::CpuRegister* base : GetRegisters()) { + for (const x86_64::CpuRegister& base : GetRegisters()) { // Base only. - all_addresses.push_back(x86_64::Address(*base, -1)); - all_addresses.push_back(x86_64::Address(*base, 0)); - all_addresses.push_back(x86_64::Address(*base, 1)); - all_addresses.push_back(x86_64::Address(*base, 123456789)); - for (x86_64::CpuRegister* index : GetRegisters()) { - if (index->AsRegister() == x86_64::RSP) { + all_addresses.push_back(x86_64::Address(base, -1)); + all_addresses.push_back(x86_64::Address(base, 0)); + all_addresses.push_back(x86_64::Address(base, 1)); + all_addresses.push_back(x86_64::Address(base, 123456789)); + for (const x86_64::CpuRegister& index : GetRegisters()) { + if (index.AsRegister() == x86_64::RSP) { // Index cannot be RSP. continue; - } else if (base->AsRegister() == index->AsRegister()) { + } else if (base.AsRegister() == index.AsRegister()) { // Index only. - all_addresses.push_back(x86_64::Address(*index, TIMES_1, -1)); - all_addresses.push_back(x86_64::Address(*index, TIMES_2, 0)); - all_addresses.push_back(x86_64::Address(*index, TIMES_4, 1)); - all_addresses.push_back(x86_64::Address(*index, TIMES_8, 123456789)); + all_addresses.push_back(x86_64::Address(index, TIMES_1, -1)); + all_addresses.push_back(x86_64::Address(index, TIMES_2, 0)); + all_addresses.push_back(x86_64::Address(index, TIMES_4, 1)); + all_addresses.push_back(x86_64::Address(index, TIMES_8, 123456789)); } // Base and index. - all_addresses.push_back(x86_64::Address(*base, *index, TIMES_1, -1)); - all_addresses.push_back(x86_64::Address(*base, *index, TIMES_2, 0)); - all_addresses.push_back(x86_64::Address(*base, *index, TIMES_4, 1)); - all_addresses.push_back(x86_64::Address(*base, *index, TIMES_8, 123456789)); + all_addresses.push_back(x86_64::Address(base, index, TIMES_1, -1)); + all_addresses.push_back(x86_64::Address(base, index, TIMES_2, 0)); + all_addresses.push_back(x86_64::Address(base, index, TIMES_4, 1)); + all_addresses.push_back(x86_64::Address(base, index, TIMES_8, 123456789)); } } DriverStr(RepeatA(&x86_64::X86_64Assembler::popq, all_addresses, "popq {mem}"), "popq"); @@ -641,11 +639,11 @@ TEST_F(AssemblerX86_64Test, SublImm) { // Shll only allows CL as the shift count. std::string shll_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); x86_64::CpuRegister shifter(x86_64::RCX); - for (auto reg : registers) { - assembler->shll(*reg, shifter); - str << "shll %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->shll(reg, shifter); + str << "shll %cl, %" << assembler_test->GetSecondaryRegisterName(reg) << "\n"; } return str.str(); } @@ -662,11 +660,11 @@ TEST_F(AssemblerX86_64Test, ShllImm) { // Shlq only allows CL as the shift count. std::string shlq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); x86_64::CpuRegister shifter(x86_64::RCX); - for (auto reg : registers) { - assembler->shlq(*reg, shifter); - str << "shlq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->shlq(reg, shifter); + str << "shlq %cl, %" << assembler_test->GetRegisterName(reg) << "\n"; } return str.str(); } @@ -683,11 +681,11 @@ TEST_F(AssemblerX86_64Test, ShlqImm) { // Shrl only allows CL as the shift count. std::string shrl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); x86_64::CpuRegister shifter(x86_64::RCX); - for (auto reg : registers) { - assembler->shrl(*reg, shifter); - str << "shrl %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->shrl(reg, shifter); + str << "shrl %cl, %" << assembler_test->GetSecondaryRegisterName(reg) << "\n"; } return str.str(); } @@ -703,11 +701,11 @@ TEST_F(AssemblerX86_64Test, ShrlImm) { // Shrq only allows CL as the shift count. std::string shrq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); x86_64::CpuRegister shifter(x86_64::RCX); - for (auto reg : registers) { - assembler->shrq(*reg, shifter); - str << "shrq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->shrq(reg, shifter); + str << "shrq %cl, %" << assembler_test->GetRegisterName(reg) << "\n"; } return str.str(); } @@ -723,11 +721,11 @@ TEST_F(AssemblerX86_64Test, ShrqImm) { // Sarl only allows CL as the shift count. std::string sarl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); x86_64::CpuRegister shifter(x86_64::RCX); - for (auto reg : registers) { - assembler->sarl(*reg, shifter); - str << "sarl %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->sarl(reg, shifter); + str << "sarl %cl, %" << assembler_test->GetSecondaryRegisterName(reg) << "\n"; } return str.str(); } @@ -743,11 +741,11 @@ TEST_F(AssemblerX86_64Test, SarlImm) { // Sarq only allows CL as the shift count. std::string sarq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); x86_64::CpuRegister shifter(x86_64::RCX); - for (auto reg : registers) { - assembler->sarq(*reg, shifter); - str << "sarq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->sarq(reg, shifter); + str << "sarq %cl, %" << assembler_test->GetRegisterName(reg) << "\n"; } return str.str(); } @@ -763,11 +761,11 @@ TEST_F(AssemblerX86_64Test, SarqImm) { // Rorl only allows CL as the shift count. std::string rorl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); x86_64::CpuRegister shifter(x86_64::RCX); - for (auto reg : registers) { - assembler->rorl(*reg, shifter); - str << "rorl %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->rorl(reg, shifter); + str << "rorl %cl, %" << assembler_test->GetSecondaryRegisterName(reg) << "\n"; } return str.str(); } @@ -783,11 +781,11 @@ TEST_F(AssemblerX86_64Test, RorlImm) { // Roll only allows CL as the shift count. std::string roll_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); x86_64::CpuRegister shifter(x86_64::RCX); - for (auto reg : registers) { - assembler->roll(*reg, shifter); - str << "roll %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->roll(reg, shifter); + str << "roll %cl, %" << assembler_test->GetSecondaryRegisterName(reg) << "\n"; } return str.str(); } @@ -803,11 +801,11 @@ TEST_F(AssemblerX86_64Test, RollImm) { // Rorq only allows CL as the shift count. std::string rorq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); x86_64::CpuRegister shifter(x86_64::RCX); - for (auto reg : registers) { - assembler->rorq(*reg, shifter); - str << "rorq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->rorq(reg, shifter); + str << "rorq %cl, %" << assembler_test->GetRegisterName(reg) << "\n"; } return str.str(); } @@ -823,11 +821,11 @@ TEST_F(AssemblerX86_64Test, RorqImm) { // Rolq only allows CL as the shift count. std::string rolq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); x86_64::CpuRegister shifter(x86_64::RCX); - for (auto reg : registers) { - assembler->rolq(*reg, shifter); - str << "rolq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n"; + for (auto&& reg : registers) { + assembler->rolq(reg, shifter); + str << "rolq %cl, %" << assembler_test->GetRegisterName(reg) << "\n"; } return str.str(); } @@ -2135,7 +2133,7 @@ TEST_F(AssemblerX86_64Test, Psrldq) { "psrldq $2, %xmm15\n", "psrldqi"); } -std::string x87_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, +std::string x87_fn([[maybe_unused]] AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; @@ -2202,7 +2200,7 @@ TEST_F(AssemblerX86_64Test, RetImm) { "ret ${imm}", /*non-negative*/ true), "ret"); } -std::string ret_and_leave_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, +std::string ret_and_leave_fn([[maybe_unused]] AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { std::ostringstream str; @@ -2375,13 +2373,13 @@ std::string setcc_test_fn(AssemblerX86_64Test::Base* assembler_test, std::string suffixes[15] = { "o", "no", "b", "ae", "e", "ne", "be", "a", "s", "ns", "pe", "po", "l", "ge", "le" }; - std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); + ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters(); std::ostringstream str; - for (auto reg : registers) { + for (auto&& reg : registers) { for (size_t i = 0; i < 15; ++i) { - assembler->setcc(static_cast<x86_64::Condition>(i), *reg); - str << "set" << suffixes[i] << " %" << assembler_test->GetQuaternaryRegisterName(*reg) << "\n"; + assembler->setcc(static_cast<x86_64::Condition>(i), reg); + str << "set" << suffixes[i] << " %" << assembler_test->GetQuaternaryRegisterName(reg) << "\n"; } } @@ -2459,27 +2457,27 @@ TEST_F(AssemblerX86_64Test, AddressDisplaceBy) { for (int32_t disp0 : displacements) { // initial displacement for (int32_t disp : displacements) { // extra displacement - for (const x86_64::CpuRegister* reg : GetRegisters()) { + for (const x86_64::CpuRegister reg : GetRegisters()) { // Test non-SIB addressing. - EXPECT_EQ(x86_64::Address::displace(x86_64::Address(*reg, disp0), disp), - x86_64::Address(*reg, disp0 + disp)); + EXPECT_EQ(x86_64::Address::displace(x86_64::Address(reg, disp0), disp), + x86_64::Address(reg, disp0 + disp)); // Test SIB addressing with RBP base. - if (reg->AsRegister() != x86_64::RSP) { + if (reg.AsRegister() != x86_64::RSP) { for (ScaleFactor scale : scales) { - EXPECT_EQ(x86_64::Address::displace(x86_64::Address(*reg, scale, disp0), disp), - x86_64::Address(*reg, scale, disp0 + disp)); + EXPECT_EQ(x86_64::Address::displace(x86_64::Address(reg, scale, disp0), disp), + x86_64::Address(reg, scale, disp0 + disp)); } } // Test SIB addressing with different base. - for (const x86_64::CpuRegister* index : GetRegisters()) { - if (index->AsRegister() == x86_64::RSP) { + for (const x86_64::CpuRegister& index : GetRegisters()) { + if (index.AsRegister() == x86_64::RSP) { continue; // Skip RSP as it cannot be used with this address constructor. } for (ScaleFactor scale : scales) { - EXPECT_EQ(x86_64::Address::displace(x86_64::Address(*reg, *index, scale, disp0), disp), - x86_64::Address(*reg, *index, scale, disp0 + disp)); + EXPECT_EQ(x86_64::Address::displace(x86_64::Address(reg, index, scale, disp0), disp), + x86_64::Address(reg, index, scale, disp0 + disp)); } } @@ -2513,7 +2511,7 @@ static x86_64::X86_64ManagedRegister ManagedFromFpu(x86_64::FloatRegister r) { return x86_64::X86_64ManagedRegister::FromXmmRegister(r); } -std::string buildframe_test_fn(JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, +std::string buildframe_test_fn([[maybe_unused]] JNIMacroAssemblerX86_64Test::Base* assembler_test, x86_64::X86_64JNIMacroAssembler* assembler) { // TODO: more interesting spill registers / entry spills. @@ -2556,7 +2554,7 @@ TEST_F(JNIMacroAssemblerX86_64Test, BuildFrame) { DriverFn(&buildframe_test_fn, "BuildFrame"); } -std::string removeframe_test_fn(JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, +std::string removeframe_test_fn([[maybe_unused]] JNIMacroAssemblerX86_64Test::Base* assembler_test, x86_64::X86_64JNIMacroAssembler* assembler) { // TODO: more interesting spill registers / entry spills. @@ -2588,7 +2586,7 @@ TEST_F(JNIMacroAssemblerX86_64Test, RemoveFrame) { } std::string increaseframe_test_fn( - JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, + [[maybe_unused]] JNIMacroAssemblerX86_64Test::Base* assembler_test, x86_64::X86_64JNIMacroAssembler* assembler) { assembler->IncreaseFrameSize(0U); assembler->IncreaseFrameSize(kStackAlignment); @@ -2608,7 +2606,7 @@ TEST_F(JNIMacroAssemblerX86_64Test, IncreaseFrame) { } std::string decreaseframe_test_fn( - JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, + [[maybe_unused]] JNIMacroAssemblerX86_64Test::Base* assembler_test, x86_64::X86_64JNIMacroAssembler* assembler) { assembler->DecreaseFrameSize(0U); assembler->DecreaseFrameSize(kStackAlignment); diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc index 388845730e..e9e6dbdae7 100644 --- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc +++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc @@ -95,7 +95,7 @@ void X86_64JNIMacroAssembler::BuildFrame(size_t frame_size, void X86_64JNIMacroAssembler::RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> spill_regs, - bool may_suspend ATTRIBUTE_UNUSED) { + [[maybe_unused]] bool may_suspend) { CHECK_ALIGNED(frame_size, kNativeStackAlignment); cfi().RememberState(); int gpr_count = 0; @@ -515,7 +515,7 @@ void X86_64JNIMacroAssembler::GetCurrentThread(FrameOffset offset) { } void X86_64JNIMacroAssembler::TryToTransitionFromRunnableToNative( - JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs ATTRIBUTE_UNUSED) { + JNIMacroLabel* label, [[maybe_unused]] ArrayRef<const ManagedRegister> scratch_regs) { constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative); constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable); constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kX86_64PointerSize>(); |