summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/Android.bp256
-rw-r--r--compiler/art_standalone_compiler_tests.xml1
-rw-r--r--compiler/cfi_test.h2
-rw-r--r--compiler/common_compiler_test.cc14
-rw-r--r--compiler/compiler.h12
-rw-r--r--compiler/compiler_reflection_test.cc1
-rw-r--r--compiler/debug/elf_debug_frame_writer.h21
-rw-r--r--compiler/debug/elf_debug_line_writer.h2
-rw-r--r--compiler/debug/elf_debug_writer.cc55
-rw-r--r--compiler/driver/compiler_options.cc5
-rw-r--r--compiler/driver/compiler_options.h6
-rw-r--r--compiler/driver/compiler_options_map-inl.h2
-rw-r--r--compiler/exception_test.cc24
-rw-r--r--compiler/jit/jit_logger.h2
-rw-r--r--compiler/jni/jni_cfi_test.cc2
-rw-r--r--compiler/jni/jni_compiler_test.cc29
-rw-r--r--compiler/jni/quick/arm64/calling_convention_arm64.cc2
-rw-r--r--compiler/jni/quick/calling_convention.cc35
-rw-r--r--compiler/jni/quick/calling_convention.h14
-rw-r--r--compiler/jni/quick/jni_compiler.cc12
-rw-r--r--compiler/jni/quick/riscv64/calling_convention_riscv64.cc429
-rw-r--r--compiler/jni/quick/riscv64/calling_convention_riscv64.h92
-rw-r--r--compiler/jni/quick/x86_64/calling_convention_x86_64.cc2
-rw-r--r--compiler/linker/output_stream_test.cc6
-rw-r--r--compiler/optimizing/block_builder.cc16
-rw-r--r--compiler/optimizing/block_builder.h5
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc13
-rw-r--r--compiler/optimizing/code_generation_data.cc57
-rw-r--r--compiler/optimizing/code_generation_data.h123
-rw-r--r--compiler/optimizing/code_generator.cc162
-rw-r--r--compiler/optimizing/code_generator.h53
-rw-r--r--compiler/optimizing/code_generator_arm64.cc240
-rw-r--r--compiler/optimizing/code_generator_arm64.h154
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc166
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h2
-rw-r--r--compiler/optimizing/code_generator_riscv64.cc6494
-rw-r--r--compiler/optimizing/code_generator_riscv64.h881
-rw-r--r--compiler/optimizing/code_generator_vector_arm64_neon.cc30
-rw-r--r--compiler/optimizing/code_generator_vector_arm64_sve.cc133
-rw-r--r--compiler/optimizing/code_generator_vector_arm_vixl.cc24
-rw-r--r--compiler/optimizing/code_generator_vector_x86.cc24
-rw-r--r--compiler/optimizing/code_generator_vector_x86_64.cc24
-rw-r--r--compiler/optimizing/code_generator_x86.cc171
-rw-r--r--compiler/optimizing/code_generator_x86.h4
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc171
-rw-r--r--compiler/optimizing/code_generator_x86_64.h14
-rw-r--r--compiler/optimizing/code_sinking.cc64
-rw-r--r--compiler/optimizing/codegen_test.cc18
-rw-r--r--compiler/optimizing/codegen_test_utils.h40
-rw-r--r--compiler/optimizing/common_arm64.h6
-rw-r--r--compiler/optimizing/constant_folding.cc90
-rw-r--r--compiler/optimizing/constructor_fence_redundancy_elimination.cc4
-rw-r--r--compiler/optimizing/dead_code_elimination.cc14
-rw-r--r--compiler/optimizing/graph_checker.cc154
-rw-r--r--compiler/optimizing/graph_checker.h25
-rw-r--r--compiler/optimizing/graph_visualizer.cc1
-rw-r--r--compiler/optimizing/inliner.cc8
-rw-r--r--compiler/optimizing/instruction_builder.cc9
-rw-r--r--compiler/optimizing/instruction_simplifier.cc213
-rw-r--r--compiler/optimizing/instruction_simplifier_shared.h2
-rw-r--r--compiler/optimizing/intrinsics.cc6
-rw-r--r--compiler/optimizing/intrinsics.h20
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc24
-rw-r--r--compiler/optimizing/intrinsics_arm64.h9
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc10
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.h9
-rw-r--r--compiler/optimizing/intrinsics_riscv64.cc354
-rw-r--r--compiler/optimizing/intrinsics_riscv64.h84
-rw-r--r--compiler/optimizing/intrinsics_x86.cc17
-rw-r--r--compiler/optimizing/intrinsics_x86.h9
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc66
-rw-r--r--compiler/optimizing/intrinsics_x86_64.h9
-rw-r--r--compiler/optimizing/jit_patches_arm64.cc104
-rw-r--r--compiler/optimizing/jit_patches_arm64.h107
-rw-r--r--compiler/optimizing/linearize_test.cc7
-rw-r--r--compiler/optimizing/live_ranges_test.cc6
-rw-r--r--compiler/optimizing/liveness_test.cc12
-rw-r--r--compiler/optimizing/load_store_analysis.cc30
-rw-r--r--compiler/optimizing/load_store_analysis.h2
-rw-r--r--compiler/optimizing/load_store_analysis_test.cc20
-rw-r--r--compiler/optimizing/load_store_elimination.cc9
-rw-r--r--compiler/optimizing/load_store_elimination_test.cc163
-rw-r--r--compiler/optimizing/locations.cc4
-rw-r--r--compiler/optimizing/loop_analysis.cc4
-rw-r--r--compiler/optimizing/loop_analysis.h16
-rw-r--r--compiler/optimizing/loop_optimization.cc857
-rw-r--r--compiler/optimizing/loop_optimization.h210
-rw-r--r--compiler/optimizing/loop_optimization_test.cc10
-rw-r--r--compiler/optimizing/nodes.cc110
-rw-r--r--compiler/optimizing/nodes.h360
-rw-r--r--compiler/optimizing/nodes_shared.h10
-rw-r--r--compiler/optimizing/nodes_vector.h87
-rw-r--r--compiler/optimizing/nodes_x86.h12
-rw-r--r--compiler/optimizing/optimization.cc4
-rw-r--r--compiler/optimizing/optimizing_cfi_test.cc22
-rw-r--r--compiler/optimizing/optimizing_compiler.cc365
-rw-r--r--compiler/optimizing/parallel_move_test.cc6
-rw-r--r--compiler/optimizing/pc_relative_fixups_x86.cc10
-rw-r--r--compiler/optimizing/reference_type_propagation.cc19
-rw-r--r--compiler/optimizing/reference_type_propagation_test.cc4
-rw-r--r--compiler/optimizing/register_allocation_resolver.cc29
-rw-r--r--compiler/optimizing/register_allocator.cc5
-rw-r--r--compiler/optimizing/register_allocator_graph_color.cc2086
-rw-r--r--compiler/optimizing/register_allocator_graph_color.h195
-rw-r--r--compiler/optimizing/register_allocator_linear_scan.cc3
-rw-r--r--compiler/optimizing/register_allocator_test.cc3
-rw-r--r--compiler/optimizing/scheduler.cc4
-rw-r--r--compiler/optimizing/scheduler_arm.cc16
-rw-r--r--compiler/optimizing/scheduler_arm.h2
-rw-r--r--compiler/optimizing/scheduler_arm64.cc47
-rw-r--r--compiler/optimizing/scheduler_arm64.h2
-rw-r--r--compiler/optimizing/select_generator.cc3
-rw-r--r--compiler/optimizing/ssa_builder.cc16
-rw-r--r--compiler/optimizing/ssa_liveness_analysis_test.cc9
-rw-r--r--compiler/optimizing/ssa_phi_elimination.cc2
-rw-r--r--compiler/optimizing/stack_map_stream.cc18
-rw-r--r--compiler/optimizing/stack_map_stream.h8
-rw-r--r--compiler/optimizing/x86_memory_gen.cc2
-rw-r--r--compiler/trampolines/trampoline_compiler.cc58
-rw-r--r--compiler/trampolines/trampoline_compiler.h2
-rw-r--r--compiler/utils/arm/assembler_arm_vixl.cc2
-rw-r--r--compiler/utils/arm/assembler_arm_vixl.h30
-rw-r--r--compiler/utils/arm/jni_macro_assembler_arm_vixl.cc18
-rw-r--r--compiler/utils/arm64/assembler_arm64.cc2
-rw-r--r--compiler/utils/arm64/assembler_arm64.h6
-rw-r--r--compiler/utils/arm64/jni_macro_assembler_arm64.cc6
-rw-r--r--compiler/utils/assembler.cc17
-rw-r--r--compiler/utils/assembler.h18
-rw-r--r--compiler/utils/assembler_test.h645
-rw-r--r--compiler/utils/assembler_test_base.h19
-rw-r--r--compiler/utils/assembler_thumb_test.cc2
-rw-r--r--compiler/utils/jni_macro_assembler.cc25
-rw-r--r--compiler/utils/jni_macro_assembler.h13
-rw-r--r--compiler/utils/jni_macro_assembler_test.h5
-rw-r--r--compiler/utils/label.h6
-rw-r--r--compiler/utils/riscv64/assembler_riscv64.cc2422
-rw-r--r--compiler/utils/riscv64/assembler_riscv64.h1178
-rw-r--r--compiler/utils/riscv64/assembler_riscv64_test.cc2939
-rw-r--r--compiler/utils/riscv64/jni_macro_assembler_riscv64.cc633
-rw-r--r--compiler/utils/riscv64/jni_macro_assembler_riscv64.h166
-rw-r--r--compiler/utils/riscv64/jni_macro_assembler_riscv64_test.cc959
-rw-r--r--compiler/utils/riscv64/managed_register_riscv64.cc2
-rw-r--r--compiler/utils/riscv64/managed_register_riscv64.h2
-rw-r--r--compiler/utils/riscv64/managed_register_riscv64_test.cc2
-rw-r--r--compiler/utils/stack_checks.h2
-rw-r--r--compiler/utils/x86/assembler_x86.h6
-rw-r--r--compiler/utils/x86/assembler_x86_test.cc114
-rw-r--r--compiler/utils/x86/jni_macro_assembler_x86.cc2
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc6
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h2
-rw-r--r--compiler/utils/x86_64/assembler_x86_64_test.cc236
-rw-r--r--compiler/utils/x86_64/jni_macro_assembler_x86_64.cc4
152 files changed, 21036 insertions, 4785 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp
index a879bd8f06..6472613cfe 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -28,6 +28,81 @@ package {
default_applicable_licenses: ["art_license"],
}
+// Common dependencies for libart-compiler_deps and libartd-compiler_deps.
+cc_defaults {
+ name: "libart-compiler_common_deps",
+ shared_libs: [
+ "libbase",
+ "liblzma", // libelffile(d) dependency; must be repeated here since it's a static lib.
+ "libartpalette",
+ ],
+ header_libs: [
+ "libart_generated_headers",
+ ],
+}
+
+// Dependencies of libart-compiler, used to propagate libart-compiler deps when static linking.
+art_cc_defaults {
+ name: "libart-compiler_deps",
+ defaults: ["libart-compiler_common_deps"],
+ shared_libs: [
+ "libartbase",
+ "libprofile",
+ "libdexfile",
+ ],
+ static_libs: ["libelffile"],
+ codegen: {
+ arm: {
+ // VIXL assembly support for ARM targets.
+ static_libs: [
+ "libvixl",
+ ],
+ },
+ arm64: {
+ // VIXL assembly support for ARM64 targets.
+ static_libs: [
+ "libvixl",
+ ],
+ },
+ },
+ runtime_libs: [
+ // `art::HGraphVisualizerDisassembler::HGraphVisualizerDisassembler` may dynamically load
+ // `libart-disassembler.so`.
+ "libart-disassembler",
+ ],
+}
+
+// Dependencies of libartd-compiler, used to propagate libartd-compiler deps when static linking.
+art_cc_defaults {
+ name: "libartd-compiler_deps",
+ defaults: ["libart-compiler_common_deps"],
+ shared_libs: [
+ "libartbased",
+ "libprofiled",
+ "libdexfiled",
+ ],
+ static_libs: ["libelffiled"],
+ codegen: {
+ arm: {
+ // VIXL assembly support for ARM targets.
+ static_libs: [
+ "libvixld",
+ ],
+ },
+ arm64: {
+ // VIXL assembly support for ARM64 targets.
+ static_libs: [
+ "libvixld",
+ ],
+ },
+ },
+ runtime_libs: [
+ // `art::HGraphVisualizerDisassembler::HGraphVisualizerDisassembler` may dynamically load
+ // `libartd-disassembler.so`.
+ "libartd-disassembler",
+ ],
+}
+
art_cc_defaults {
name: "libart-compiler-defaults",
defaults: ["art_defaults"],
@@ -46,6 +121,7 @@ art_cc_defaults {
"optimizing/bounds_check_elimination.cc",
"optimizing/builder.cc",
"optimizing/cha_guard_optimization.cc",
+ "optimizing/code_generation_data.cc",
"optimizing/code_generator.cc",
"optimizing/code_generator_utils.cc",
"optimizing/code_sinking.cc",
@@ -80,7 +156,6 @@ art_cc_defaults {
"optimizing/reference_type_propagation.cc",
"optimizing/register_allocation_resolver.cc",
"optimizing/register_allocator.cc",
- "optimizing/register_allocator_graph_color.cc",
"optimizing/register_allocator_linear_scan.cc",
"optimizing/select_generator.cc",
"optimizing/scheduler.cc",
@@ -122,6 +197,7 @@ art_cc_defaults {
"optimizing/code_generator_arm64.cc",
"optimizing/code_generator_vector_arm64_neon.cc",
"optimizing/code_generator_vector_arm64_sve.cc",
+ "optimizing/jit_patches_arm64.cc",
"optimizing/scheduler_arm64.cc",
"optimizing/instruction_simplifier_arm64.cc",
"optimizing/intrinsics_arm64.cc",
@@ -132,6 +208,11 @@ art_cc_defaults {
},
riscv64: {
srcs: [
+ "jni/quick/riscv64/calling_convention_riscv64.cc",
+ "optimizing/code_generator_riscv64.cc",
+ "optimizing/intrinsics_riscv64.cc",
+ "utils/riscv64/assembler_riscv64.cc",
+ "utils/riscv64/jni_macro_assembler_riscv64.cc",
"utils/riscv64/managed_register_riscv64.cc",
],
},
@@ -210,35 +291,13 @@ art_cc_library {
"libart-compiler-defaults",
"dex2oat-pgo-defaults",
"art_hugepage_defaults",
+ "libart-compiler_deps",
],
- codegen: {
- arm: {
- // VIXL assembly support for ARM targets.
- static_libs: [
- "libvixl",
- ],
- },
- arm64: {
- // VIXL assembly support for ARM64 targets.
- static_libs: [
- "libvixl",
- ],
- },
- },
shared_libs: [
+ // libart is not included in libart-compiler_deps to allow libart-compiler(-for-test)
+ // select suitable libart library (either with or without LTO).
"libart",
- "libartbase",
- "libartpalette",
- "libprofile",
- "libdexfile",
],
- static_libs: ["libelffile"],
- runtime_libs: [
- // `art::HGraphVisualizerDisassembler::HGraphVisualizerDisassembler` may dynamically load
- // `libart-disassembler.so`.
- "libart-disassembler",
- ],
-
target: {
android: {
lto: {
@@ -253,6 +312,22 @@ art_cc_library {
],
}
+// For static linking with gtests. Same as `libart-compiler`, but without LTO.
+// When gtests static link a library with LTO enabled, they are also built with LTO.
+// This makes the build process use a lot of memory. b/277207452
+art_cc_library_static {
+ name: "libart-compiler-for-test",
+ defaults: [
+ "libart-compiler-defaults",
+ "dex2oat-pgo-defaults",
+ "art_hugepage_defaults",
+ "libart-compiler_deps",
+ ],
+ header_libs: [
+ "libart_headers",
+ ],
+}
+
cc_defaults {
name: "libart-compiler_static_defaults",
defaults: [
@@ -266,48 +341,32 @@ cc_defaults {
whole_static_libs: ["libart-compiler"],
}
+// libart-compiler_static_defaults for standalone gtests.
+// Uses libart-for-test_static_defaults instead of libart_static_defaults.
+// Uses libart-compiler-for-test instead of libart-compiler.
+cc_defaults {
+ name: "libart-compiler-for-test_static_defaults",
+ defaults: [
+ "libart-compiler_static_base_defaults",
+ "libart-disassembler_static_defaults",
+ "libart-for-test_static_defaults",
+ "libartbase_static_defaults",
+ "libdexfile_static_defaults",
+ "libprofile_static_defaults",
+ ],
+ whole_static_libs: ["libart-compiler-for-test"],
+}
+
art_cc_library {
name: "libartd-compiler",
defaults: [
"art_debug_defaults",
"libart-compiler-defaults",
+ "libartd-compiler_deps",
],
- codegen: {
- arm: {
- // VIXL assembly support for ARM targets.
- static_libs: [
- "libvixld",
- ],
- // Export vixl headers as they are included in this library's headers used by tests.
- export_static_lib_headers: [
- "libvixld",
- ],
- },
- arm64: {
- // VIXL assembly support for ARM64 targets.
- static_libs: [
- "libvixld",
- ],
- // Export vixl headers as they are included in this library's headers used by tests.
- export_static_lib_headers: [
- "libvixld",
- ],
- },
- },
shared_libs: [
- "libartbased",
"libartd",
- "libartpalette",
- "libprofiled",
- "libdexfiled",
],
- static_libs: ["libelffiled"],
- runtime_libs: [
- // `art::HGraphVisualizerDisassembler::HGraphVisualizerDisassembler` may dynamically load
- // `libartd-disassembler.so`.
- "libartd-disassembler",
- ],
-
apex_available: [
"com.android.art.debug",
// TODO(b/183882457): This lib doesn't go into com.android.art, but
@@ -330,42 +389,47 @@ cc_defaults {
whole_static_libs: ["libartd-compiler"],
}
+// libartd-compiler_static_defaults for gtests.
+// Uses libartd-for-test_static_defaults instead of libart_static_defaults.
+// Note that `libartd-compiler-for-test` is not required here, because `libartd-compiler`
+// doesn't use LTO.
+cc_defaults {
+ name: "libartd-compiler-for-test_static_defaults",
+ defaults: [
+ "libart-compiler_static_base_defaults",
+ "libartbased_static_defaults",
+ "libartd-disassembler_static_defaults",
+ "libartd-for-test_static_defaults",
+ "libdexfiled_static_defaults",
+ "libprofiled_static_defaults",
+ ],
+ whole_static_libs: ["libartd-compiler"],
+}
+
// Properties common to `libart-compiler-gtest` and `libartd-compiler-gtest`.
art_cc_defaults {
name: "libart-compiler-gtest-common",
srcs: [
"common_compiler_test.cc",
],
- shared_libs: [
- "libbase",
- ],
}
-art_cc_library {
+art_cc_library_static {
name: "libart-compiler-gtest",
defaults: [
"libart-gtest-defaults",
"libart-compiler-gtest-common",
- ],
- shared_libs: [
- "libart-compiler",
- "libart-disassembler",
- "libartbase-art-gtest",
- "libart-runtime-gtest",
+ "libart-compiler-for-test_static_defaults",
],
}
-art_cc_library {
+art_cc_library_static {
name: "libartd-compiler-gtest",
defaults: [
- "libartd-gtest-defaults",
+ "art_debug_defaults",
+ "libart-gtest-defaults",
"libart-compiler-gtest-common",
- ],
- shared_libs: [
- "libartd-compiler",
- "libartd-disassembler",
- "libartbased-art-gtest",
- "libartd-runtime-gtest",
+ "libartd-compiler-for-test_static_defaults",
],
}
@@ -474,11 +538,6 @@ art_cc_defaults {
"libnativehelper_header_only",
],
- shared_libs: [
- "libnativeloader",
- "libunwindstack",
- ],
-
target: {
host: {
shared_libs: [
@@ -496,15 +555,8 @@ art_cc_test {
"art_gtest_defaults",
"art_compiler_tests_defaults",
],
- shared_libs: [
- "libprofiled",
- "libartd-simulator-container",
- "liblzma",
- ],
static_libs: [
- "libartd-compiler",
- "libelffiled",
- "libvixld",
+ "libartd-simulator-container",
],
}
@@ -516,21 +568,8 @@ art_cc_test {
"art_compiler_tests_defaults",
],
data: [":generate-boot-image"],
- shared_libs: [
- "libprofile",
- "liblzma",
- "libartpalette",
- ],
static_libs: [
- // For now, link `libart-simulator-container` statically for simplicity,
- // to save the added complexity to package it in test suites (along with
- // other test artifacts) and install it on device during tests.
- // TODO(b/192070541): Consider linking `libart-simulator-container`
- // dynamically.
"libart-simulator-container",
- "libart-compiler",
- "libelffile",
- "libvixl",
],
test_config: "art_standalone_compiler_tests.xml",
}
@@ -551,6 +590,12 @@ art_cc_test {
"utils/assembler_thumb_test.cc",
],
},
+ riscv64: {
+ srcs: [
+ "utils/riscv64/assembler_riscv64_test.cc",
+ "utils/riscv64/jni_macro_assembler_riscv64_test.cc",
+ ],
+ },
x86: {
srcs: [
"utils/x86/assembler_x86_test.cc",
@@ -562,12 +607,7 @@ art_cc_test {
],
},
},
- shared_libs: [
- "liblzma",
- ],
static_libs: [
- "libartd-compiler",
- "libelffiled",
"libvixld",
],
}
diff --git a/compiler/art_standalone_compiler_tests.xml b/compiler/art_standalone_compiler_tests.xml
index 394ac8d4fb..8e8636cca4 100644
--- a/compiler/art_standalone_compiler_tests.xml
+++ b/compiler/art_standalone_compiler_tests.xml
@@ -15,6 +15,7 @@
-->
<configuration description="Runs art_standalone_compiler_tests.">
<option name="config-descriptor:metadata" key="mainline-param" value="com.google.android.art.apex" />
+ <option name="config-descriptor:metadata" key="mainline-param" value="com.android.art.apex" />
<target_preparer class="com.android.compatibility.common.tradefed.targetprep.FilePusher">
<option name="cleanup" value="true" />
diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h
index e65bee8e2e..6835e92dfd 100644
--- a/compiler/cfi_test.h
+++ b/compiler/cfi_test.h
@@ -131,7 +131,7 @@ class CFITest : public dwarf::DwarfTest {
}
// Use the .cfi_ prefix.
new_line = ".cfi_" + new_line.substr(FindEndOf(new_line, "DW_CFA_"));
- output->push_back(address + ": " + new_line);
+ output->push_back(ART_FORMAT("{}: {}", address, new_line));
}
}
}
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 442b96e5fa..a37f516759 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -133,9 +133,9 @@ class CommonCompilerTestImpl::OneCompiledMethodStorage final : public CompiledCo
CompiledMethod* CreateCompiledMethod(InstructionSet instruction_set,
ArrayRef<const uint8_t> code,
ArrayRef<const uint8_t> stack_map,
- ArrayRef<const uint8_t> cfi ATTRIBUTE_UNUSED,
+ [[maybe_unused]] ArrayRef<const uint8_t> cfi,
ArrayRef<const linker::LinkerPatch> patches,
- bool is_intrinsic ATTRIBUTE_UNUSED) override {
+ [[maybe_unused]] bool is_intrinsic) override {
// Supports only one method at a time.
CHECK_EQ(instruction_set_, InstructionSet::kNone);
CHECK_NE(instruction_set, InstructionSet::kNone);
@@ -150,15 +150,15 @@ class CommonCompilerTestImpl::OneCompiledMethodStorage final : public CompiledCo
return reinterpret_cast<CompiledMethod*>(this);
}
- ArrayRef<const uint8_t> GetThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED,
- /*out*/ std::string* debug_name ATTRIBUTE_UNUSED) override {
+ ArrayRef<const uint8_t> GetThunkCode([[maybe_unused]] const linker::LinkerPatch& patch,
+ [[maybe_unused]] /*out*/ std::string* debug_name) override {
LOG(FATAL) << "Unsupported.";
UNREACHABLE();
}
- void SetThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED,
- ArrayRef<const uint8_t> code ATTRIBUTE_UNUSED,
- const std::string& debug_name ATTRIBUTE_UNUSED) override {
+ void SetThunkCode([[maybe_unused]] const linker::LinkerPatch& patch,
+ [[maybe_unused]] ArrayRef<const uint8_t> code,
+ [[maybe_unused]] const std::string& debug_name) override {
LOG(FATAL) << "Unsupported.";
UNREACHABLE();
}
diff --git a/compiler/compiler.h b/compiler/compiler.h
index ce785bb769..6c317f7e02 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -73,12 +73,12 @@ class Compiler {
const DexFile& dex_file,
Handle<mirror::DexCache> dex_cache) const = 0;
- virtual bool JitCompile(Thread* self ATTRIBUTE_UNUSED,
- jit::JitCodeCache* code_cache ATTRIBUTE_UNUSED,
- jit::JitMemoryRegion* region ATTRIBUTE_UNUSED,
- ArtMethod* method ATTRIBUTE_UNUSED,
- CompilationKind compilation_kind ATTRIBUTE_UNUSED,
- jit::JitLogger* jit_logger ATTRIBUTE_UNUSED)
+ virtual bool JitCompile([[maybe_unused]] Thread* self,
+ [[maybe_unused]] jit::JitCodeCache* code_cache,
+ [[maybe_unused]] jit::JitMemoryRegion* region,
+ [[maybe_unused]] ArtMethod* method,
+ [[maybe_unused]] CompilationKind compilation_kind,
+ [[maybe_unused]] jit::JitLogger* jit_logger)
REQUIRES_SHARED(Locks::mutator_lock_) {
return false;
}
diff --git a/compiler/compiler_reflection_test.cc b/compiler/compiler_reflection_test.cc
index f3c07db136..d8e2b9e5b9 100644
--- a/compiler/compiler_reflection_test.cc
+++ b/compiler/compiler_reflection_test.cc
@@ -29,6 +29,7 @@ namespace art HIDDEN {
class CompilerReflectionTest : public CommonCompilerTest {};
TEST_F(CompilerReflectionTest, StaticMainMethod) {
+ TEST_DISABLED_FOR_RISCV64();
ScopedObjectAccess soa(Thread::Current());
jobject jclass_loader = LoadDex("Main");
StackHandleScope<1> hs(soa.Self());
diff --git a/compiler/debug/elf_debug_frame_writer.h b/compiler/debug/elf_debug_frame_writer.h
index 6b72262e26..fe98a578b1 100644
--- a/compiler/debug/elf_debug_frame_writer.h
+++ b/compiler/debug/elf_debug_frame_writer.h
@@ -90,7 +90,26 @@ static void WriteCIE(InstructionSet isa, /*inout*/ std::vector<uint8_t>* buffer)
return;
}
case InstructionSet::kRiscv64: {
- UNIMPLEMENTED(FATAL);
+ dwarf::DebugFrameOpCodeWriter<> opcodes;
+ opcodes.DefCFA(Reg::Riscv64Core(2), 0); // X2(SP).
+ // core registers.
+ for (int reg = 3; reg < 32; reg++) { // Skip X0 (Zero), X1 (RA) and X2 (SP).
+ if ((reg >= 5 && reg < 8) || (reg >= 10 && reg < 18) || reg >= 28) {
+ opcodes.Undefined(Reg::Riscv64Core(reg));
+ } else {
+ opcodes.SameValue(Reg::Riscv64Core(reg));
+ }
+ }
+ // fp registers.
+ for (int reg = 0; reg < 32; reg++) {
+ if (reg < 8 || (reg >=10 && reg < 18) || reg >= 28) {
+ opcodes.Undefined(Reg::Riscv64Fp(reg));
+ } else {
+ opcodes.SameValue(Reg::Riscv64Fp(reg));
+ }
+ }
+ auto return_reg = Reg::Riscv64Core(1); // X1(RA).
+ WriteCIE(is64bit, return_reg, opcodes, buffer);
return;
}
case InstructionSet::kX86: {
diff --git a/compiler/debug/elf_debug_line_writer.h b/compiler/debug/elf_debug_line_writer.h
index 4896bc1e9b..5d654e3e06 100644
--- a/compiler/debug/elf_debug_line_writer.h
+++ b/compiler/debug/elf_debug_line_writer.h
@@ -194,7 +194,7 @@ class ElfDebugLineWriter {
} else {
directory_index = it->second;
}
- full_path = package_name + "/" + file_name;
+ full_path = ART_FORMAT("{}/{}", package_name, file_name);
}
// Add file entry.
diff --git a/compiler/debug/elf_debug_writer.cc b/compiler/debug/elf_debug_writer.cc
index 8f64d73aa7..505b6c5d8a 100644
--- a/compiler/debug/elf_debug_writer.cc
+++ b/compiler/debug/elf_debug_writer.cc
@@ -113,7 +113,7 @@ void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
template <typename ElfTypes>
static std::vector<uint8_t> MakeMiniDebugInfoInternal(
InstructionSet isa,
- const InstructionSetFeatures* features ATTRIBUTE_UNUSED,
+ [[maybe_unused]] const InstructionSetFeatures* features,
typename ElfTypes::Addr text_section_address,
size_t text_section_size,
typename ElfTypes::Addr dex_section_address,
@@ -172,11 +172,10 @@ std::vector<uint8_t> MakeMiniDebugInfo(
}
}
-std::vector<uint8_t> MakeElfFileForJIT(
- InstructionSet isa,
- const InstructionSetFeatures* features ATTRIBUTE_UNUSED,
- bool mini_debug_info,
- const MethodDebugInfo& method_info) {
+std::vector<uint8_t> MakeElfFileForJIT(InstructionSet isa,
+ [[maybe_unused]] const InstructionSetFeatures* features,
+ bool mini_debug_info,
+ const MethodDebugInfo& method_info) {
using ElfTypes = ElfRuntimeTypes;
CHECK_EQ(sizeof(ElfTypes::Addr), static_cast<size_t>(GetInstructionSetPointerSize(isa)));
CHECK_EQ(method_info.is_code_address_text_relative, false);
@@ -213,13 +212,12 @@ std::vector<uint8_t> MakeElfFileForJIT(
DCHECK_EQ(sym.st_size, method_info.code_size);
num_syms++;
});
- reader.VisitDebugFrame([&](const Reader::CIE* cie ATTRIBUTE_UNUSED) {
- num_cies++;
- }, [&](const Reader::FDE* fde, const Reader::CIE* cie ATTRIBUTE_UNUSED) {
- DCHECK_EQ(fde->sym_addr, method_info.code_address);
- DCHECK_EQ(fde->sym_size, method_info.code_size);
- num_fdes++;
- });
+ reader.VisitDebugFrame([&]([[maybe_unused]] const Reader::CIE* cie) { num_cies++; },
+ [&](const Reader::FDE* fde, [[maybe_unused]] const Reader::CIE* cie) {
+ DCHECK_EQ(fde->sym_addr, method_info.code_address);
+ DCHECK_EQ(fde->sym_size, method_info.code_size);
+ num_fdes++;
+ });
DCHECK_EQ(num_syms, 1u);
DCHECK_LE(num_cies, 1u);
DCHECK_LE(num_fdes, 1u);
@@ -302,18 +300,20 @@ std::vector<uint8_t> PackElfFileForJIT(
// ART always produces the same CIE, so we copy the first one and ignore the rest.
bool copied_cie = false;
for (Reader& reader : readers) {
- reader.VisitDebugFrame([&](const Reader::CIE* cie) {
- if (!copied_cie) {
- debug_frame->WriteFully(cie->data(), cie->size());
- copied_cie = true;
- }
- }, [&](const Reader::FDE* fde, const Reader::CIE* cie ATTRIBUTE_UNUSED) {
- DCHECK(copied_cie);
- DCHECK_EQ(fde->cie_pointer, 0);
- if (!is_removed_symbol(fde->sym_addr)) {
- debug_frame->WriteFully(fde->data(), fde->size());
- }
- });
+ reader.VisitDebugFrame(
+ [&](const Reader::CIE* cie) {
+ if (!copied_cie) {
+ debug_frame->WriteFully(cie->data(), cie->size());
+ copied_cie = true;
+ }
+ },
+ [&](const Reader::FDE* fde, [[maybe_unused]] const Reader::CIE* cie) {
+ DCHECK(copied_cie);
+ DCHECK_EQ(fde->cie_pointer, 0);
+ if (!is_removed_symbol(fde->sym_addr)) {
+ debug_frame->WriteFully(fde->data(), fde->size());
+ }
+ });
}
debug_frame->End();
@@ -348,9 +348,8 @@ std::vector<uint8_t> PackElfFileForJIT(
std::vector<uint8_t> WriteDebugElfFileForClasses(
InstructionSet isa,
- const InstructionSetFeatures* features ATTRIBUTE_UNUSED,
- const ArrayRef<mirror::Class*>& types)
- REQUIRES_SHARED(Locks::mutator_lock_) {
+ [[maybe_unused]] const InstructionSetFeatures* features,
+ const ArrayRef<mirror::Class*>& types) REQUIRES_SHARED(Locks::mutator_lock_) {
using ElfTypes = ElfRuntimeTypes;
CHECK_EQ(sizeof(ElfTypes::Addr), static_cast<size_t>(GetInstructionSetPointerSize(isa)));
std::vector<uint8_t> buffer;
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 603596f3bc..d0770e952b 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -57,7 +57,7 @@ CompilerOptions::CompilerOptions()
generate_debug_info_(kDefaultGenerateDebugInfo),
generate_mini_debug_info_(kDefaultGenerateMiniDebugInfo),
generate_build_id_(false),
- implicit_null_checks_(true),
+ implicit_null_checks_(false),
implicit_so_checks_(true),
implicit_suspend_checks_(false),
compile_pic_(false),
@@ -121,7 +121,8 @@ bool CompilerOptions::ParseRegisterAllocationStrategy(const std::string& option,
if (option == "linear-scan") {
register_allocation_strategy_ = RegisterAllocator::Strategy::kRegisterAllocatorLinearScan;
} else if (option == "graph-color") {
- register_allocation_strategy_ = RegisterAllocator::Strategy::kRegisterAllocatorGraphColor;
+ LOG(ERROR) << "Graph coloring allocator has been removed, using linear scan instead.";
+ register_allocation_strategy_ = RegisterAllocator::Strategy::kRegisterAllocatorLinearScan;
} else {
*error_msg = "Unrecognized register allocation strategy. Try linear-scan, or graph-color.";
return false;
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index c8a41ce24b..a5b3ae17d0 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -42,6 +42,7 @@ class VerifierDepsTest;
namespace linker {
class Arm64RelativePatcherTest;
+class Thumb2RelativePatcherTest;
} // namespace linker
class ArtMethod;
@@ -115,9 +116,7 @@ class CompilerOptions final {
}
bool IsAnyCompilationEnabled() const {
- return CompilerFilter::IsAnyCompilationEnabled(compiler_filter_) &&
- // TODO(riscv64): remove this when we have compiler support for RISC-V
- GetInstructionSet() != InstructionSet::kRiscv64;
+ return CompilerFilter::IsAnyCompilationEnabled(compiler_filter_);
}
size_t GetHugeMethodThreshold() const {
@@ -504,6 +503,7 @@ class CompilerOptions final {
friend class jit::JitCompiler;
friend class verifier::VerifierDepsTest;
friend class linker::Arm64RelativePatcherTest;
+ friend class linker::Thumb2RelativePatcherTest;
template <class Base>
friend bool ReadCompilerOptions(Base& map, CompilerOptions* options, std::string* error_msg);
diff --git a/compiler/driver/compiler_options_map-inl.h b/compiler/driver/compiler_options_map-inl.h
index 79a59625f5..8530df37e4 100644
--- a/compiler/driver/compiler_options_map-inl.h
+++ b/compiler/driver/compiler_options_map-inl.h
@@ -117,7 +117,7 @@ inline bool ReadCompilerOptions(Base& map, CompilerOptions* options, std::string
#pragma GCC diagnostic ignored "-Wframe-larger-than="
template <typename Map, typename Builder>
-inline void AddCompilerOptionsArgumentParserOptions(Builder& b) {
+NO_INLINE void AddCompilerOptionsArgumentParserOptions(Builder& b) {
// clang-format off
b.
Define("--compiler-filter=_")
diff --git a/compiler/exception_test.cc b/compiler/exception_test.cc
index 82c4998217..75ade55799 100644
--- a/compiler/exception_test.cc
+++ b/compiler/exception_test.cc
@@ -69,9 +69,10 @@ class ExceptionTest : public CommonRuntimeTest {
dex_ = my_klass_->GetDexCache()->GetDexFile();
+ std::vector<uint8_t> fake_code;
uint32_t code_size = 12;
for (size_t i = 0 ; i < code_size; i++) {
- fake_code_.push_back(0x70 | i);
+ fake_code.push_back(0x70 | i);
}
const uint32_t native_pc_offset = 4u;
@@ -96,16 +97,23 @@ class ExceptionTest : public CommonRuntimeTest {
const size_t header_size = sizeof(OatQuickMethodHeader);
const size_t code_alignment = GetInstructionSetCodeAlignment(kRuntimeISA);
- fake_header_code_and_maps_.resize(stack_maps_size + header_size + code_size + code_alignment);
- // NB: The start of the vector might not have been allocated the desired alignment.
+ fake_header_code_and_maps_size_ = stack_maps_size + header_size + code_size + code_alignment;
+ // Use mmap to make sure we get untagged memory here. Real code gets allocated using
+ // mspace_memalign which is never tagged.
+ fake_header_code_and_maps_ = static_cast<uint8_t*>(mmap(nullptr,
+ fake_header_code_and_maps_size_,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ -1,
+ 0));
uint8_t* code_ptr =
AlignUp(&fake_header_code_and_maps_[stack_maps_size + header_size], code_alignment);
memcpy(&fake_header_code_and_maps_[0], stack_map.data(), stack_maps_size);
- OatQuickMethodHeader method_header(code_ptr - fake_header_code_and_maps_.data());
+ OatQuickMethodHeader method_header(code_ptr - fake_header_code_and_maps_);
static_assert(std::is_trivially_copyable<OatQuickMethodHeader>::value, "Cannot use memcpy");
memcpy(code_ptr - header_size, &method_header, header_size);
- memcpy(code_ptr, fake_code_.data(), fake_code_.size());
+ memcpy(code_ptr, fake_code.data(), fake_code.size());
if (kRuntimeISA == InstructionSet::kArm) {
// Check that the Thumb2 adjustment will be a NOP, see EntryPointToCodePointer().
@@ -123,10 +131,12 @@ class ExceptionTest : public CommonRuntimeTest {
method_g_->SetEntryPointFromQuickCompiledCode(code_ptr);
}
+ void TearDown() override { munmap(fake_header_code_and_maps_, fake_header_code_and_maps_size_); }
+
const DexFile* dex_;
- std::vector<uint8_t> fake_code_;
- std::vector<uint8_t> fake_header_code_and_maps_;
+ size_t fake_header_code_and_maps_size_;
+ uint8_t* fake_header_code_and_maps_;
ArtMethod* method_f_;
ArtMethod* method_g_;
diff --git a/compiler/jit/jit_logger.h b/compiler/jit/jit_logger.h
index 9d1f3073fa..79f47f817f 100644
--- a/compiler/jit/jit_logger.h
+++ b/compiler/jit/jit_logger.h
@@ -53,7 +53,7 @@ namespace jit {
//
// Command line Example:
// $ perf record -k mono dalvikvm -Xcompiler-option --generate-debug-info -cp <classpath> Test
-// $ perf inject -i perf.data -o perf.data.jitted
+// $ perf inject -j -i perf.data -o perf.data.jitted
// $ perf report -i perf.data.jitted
// $ perf annotate -i perf.data.jitted
// NOTE:
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
index 70cf2d4eb0..ae5f2d0aa9 100644
--- a/compiler/jni/jni_cfi_test.cc
+++ b/compiler/jni/jni_cfi_test.cc
@@ -99,7 +99,7 @@ class JNICFITest : public CFITest {
jni_asm->FinalizeCode();
std::vector<uint8_t> actual_asm(jni_asm->CodeSize());
MemoryRegion code(&actual_asm[0], actual_asm.size());
- jni_asm->FinalizeInstructions(code);
+ jni_asm->CopyInstructions(code);
ASSERT_EQ(jni_asm->cfi().GetCurrentCFAOffset(), frame_size);
const std::vector<uint8_t>& actual_cfi = *(jni_asm->cfi().data());
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 397db251b8..40989b2999 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -175,9 +175,8 @@ size_t count_nonnull_refs_single_helper(T arg,
// SFINAE for non-ref-types. Always 0.
template <typename T>
-size_t count_nonnull_refs_single_helper(T arg ATTRIBUTE_UNUSED,
- typename std::enable_if<!jni_type_traits<T>::is_ref>::type*
- = nullptr) {
+size_t count_nonnull_refs_single_helper(
+ [[maybe_unused]] T arg, typename std::enable_if<!jni_type_traits<T>::is_ref>::type* = nullptr) {
return 0;
}
@@ -591,10 +590,9 @@ struct ScopedCheckHandleScope {
class CountReferencesVisitor : public RootVisitor {
public:
- void VisitRoots(mirror::Object*** roots ATTRIBUTE_UNUSED,
+ void VisitRoots([[maybe_unused]] mirror::Object*** roots,
size_t count,
- const RootInfo& info) override
- REQUIRES_SHARED(Locks::mutator_lock_) {
+ const RootInfo& info) override REQUIRES_SHARED(Locks::mutator_lock_) {
if (info.GetType() == art::RootType::kRootJavaFrame) {
const JavaFrameRootInfo& jrfi = static_cast<const JavaFrameRootInfo&>(info);
if (jrfi.GetVReg() == JavaFrameRootInfo::kNativeReferenceArgument) {
@@ -604,10 +602,9 @@ class CountReferencesVisitor : public RootVisitor {
}
}
- void VisitRoots(mirror::CompressedReference<mirror::Object>** roots ATTRIBUTE_UNUSED,
- size_t count ATTRIBUTE_UNUSED,
- const RootInfo& info) override
- REQUIRES_SHARED(Locks::mutator_lock_) {
+ void VisitRoots([[maybe_unused]] mirror::CompressedReference<mirror::Object>** roots,
+ [[maybe_unused]] size_t count,
+ const RootInfo& info) override REQUIRES_SHARED(Locks::mutator_lock_) {
CHECK_NE(info.GetType(), art::RootType::kRootJavaFrame);
}
@@ -980,8 +977,8 @@ void JniCompilerTest::CompileAndRunIntObjectObjectMethodImpl() {
JNI_TEST(CompileAndRunIntObjectObjectMethod)
int gJava_MyClassNatives_fooSII_calls[kJniKindCount] = {};
-jint Java_MyClassNatives_fooSII(JNIEnv* env ATTRIBUTE_UNUSED,
- jclass klass ATTRIBUTE_UNUSED,
+jint Java_MyClassNatives_fooSII([[maybe_unused]] JNIEnv* env,
+ [[maybe_unused]] jclass klass,
jint x,
jint y) {
gJava_MyClassNatives_fooSII_calls[gCurrentJni]++;
@@ -1003,8 +1000,8 @@ void JniCompilerTest::CompileAndRunStaticIntIntMethodImpl() {
JNI_TEST_CRITICAL(CompileAndRunStaticIntIntMethod)
int gJava_MyClassNatives_fooSDD_calls[kJniKindCount] = {};
-jdouble Java_MyClassNatives_fooSDD(JNIEnv* env ATTRIBUTE_UNUSED,
- jclass klass ATTRIBUTE_UNUSED,
+jdouble Java_MyClassNatives_fooSDD([[maybe_unused]] JNIEnv* env,
+ [[maybe_unused]] jclass klass,
jdouble x,
jdouble y) {
gJava_MyClassNatives_fooSDD_calls[gCurrentJni]++;
@@ -1676,8 +1673,8 @@ void JniCompilerTest::CompileAndRunFloatFloatMethodImpl() {
JNI_TEST(CompileAndRunFloatFloatMethod)
-void Java_MyClassNatives_checkParameterAlign(JNIEnv* env ATTRIBUTE_UNUSED,
- jobject thisObj ATTRIBUTE_UNUSED,
+void Java_MyClassNatives_checkParameterAlign([[maybe_unused]] JNIEnv* env,
+ [[maybe_unused]] jobject thisObj,
jint i1,
jlong l1) {
EXPECT_EQ(i1, 1234);
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index e716502911..cd6aac517d 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -323,7 +323,7 @@ ArrayRef<const ManagedRegister> Arm64JniCallingConvention::CalleeSaveRegisters()
static_assert(kCalleeSaveRegisters[lr_index].Equals(
Arm64ManagedRegister::FromXRegister(LR)));
return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters).SubArray(
- /*pos*/ lr_index, /*length=*/ 1u);
+ /*pos=*/ lr_index, /*length=*/ 1u);
}
} else {
return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc
index 2b9da6ba1a..459beb0c67 100644
--- a/compiler/jni/quick/calling_convention.cc
+++ b/compiler/jni/quick/calling_convention.cc
@@ -29,6 +29,10 @@
#include "jni/quick/arm64/calling_convention_arm64.h"
#endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+#include "jni/quick/riscv64/calling_convention_riscv64.h"
+#endif
+
#ifdef ART_ENABLE_CODEGEN_x86
#include "jni/quick/x86/calling_convention_x86.h"
#endif
@@ -61,6 +65,12 @@ std::unique_ptr<ManagedRuntimeCallingConvention> ManagedRuntimeCallingConvention
new (allocator) arm64::Arm64ManagedRuntimeCallingConvention(
is_static, is_synchronized, shorty));
#endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+ case InstructionSet::kRiscv64:
+ return std::unique_ptr<ManagedRuntimeCallingConvention>(
+ new (allocator) riscv64::Riscv64ManagedRuntimeCallingConvention(
+ is_static, is_synchronized, shorty));
+#endif
#ifdef ART_ENABLE_CODEGEN_x86
case InstructionSet::kX86:
return std::unique_ptr<ManagedRuntimeCallingConvention>(
@@ -114,7 +124,7 @@ bool ManagedRuntimeCallingConvention::IsCurrentArgPossiblyNull() {
}
size_t ManagedRuntimeCallingConvention::CurrentParamSize() {
- return ParamSize(itr_args_);
+ return ParamSize(itr_args_, /*reference_size=*/ sizeof(mirror::HeapReference<mirror::Object>));
}
bool ManagedRuntimeCallingConvention::IsCurrentParamAReference() {
@@ -156,6 +166,12 @@ std::unique_ptr<JniCallingConvention> JniCallingConvention::Create(ArenaAllocato
new (allocator) arm64::Arm64JniCallingConvention(
is_static, is_synchronized, is_fast_native, is_critical_native, shorty));
#endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+ case InstructionSet::kRiscv64:
+ return std::unique_ptr<JniCallingConvention>(
+ new (allocator) riscv64::Riscv64JniCallingConvention(
+ is_static, is_synchronized, is_fast_native, is_critical_native, shorty));
+#endif
#ifdef ART_ENABLE_CODEGEN_x86
case InstructionSet::kX86:
return std::unique_ptr<JniCallingConvention>(
@@ -188,7 +204,7 @@ bool JniCallingConvention::HasNext() {
if (IsCurrentArgExtraForJni()) {
return true;
} else {
- unsigned int arg_pos = GetIteratorPositionWithinShorty();
+ size_t arg_pos = GetIteratorPositionWithinShorty();
return arg_pos < NumArgs();
}
}
@@ -220,7 +236,7 @@ bool JniCallingConvention::IsCurrentParamAReference() {
&return_value)) {
return return_value;
} else {
- int arg_pos = GetIteratorPositionWithinShorty();
+ size_t arg_pos = GetIteratorPositionWithinShorty();
return IsParamAReference(arg_pos);
}
}
@@ -242,7 +258,7 @@ bool JniCallingConvention::IsCurrentParamAFloatOrDouble() {
&return_value)) {
return return_value;
} else {
- int arg_pos = GetIteratorPositionWithinShorty();
+ size_t arg_pos = GetIteratorPositionWithinShorty();
return IsParamAFloatOrDouble(arg_pos);
}
}
@@ -256,7 +272,7 @@ bool JniCallingConvention::IsCurrentParamADouble() {
&return_value)) {
return return_value;
} else {
- int arg_pos = GetIteratorPositionWithinShorty();
+ size_t arg_pos = GetIteratorPositionWithinShorty();
return IsParamADouble(arg_pos);
}
}
@@ -270,7 +286,7 @@ bool JniCallingConvention::IsCurrentParamALong() {
&return_value)) {
return return_value;
} else {
- int arg_pos = GetIteratorPositionWithinShorty();
+ size_t arg_pos = GetIteratorPositionWithinShorty();
return IsParamALong(arg_pos);
}
}
@@ -279,8 +295,9 @@ size_t JniCallingConvention::CurrentParamSize() const {
if (IsCurrentArgExtraForJni()) {
return static_cast<size_t>(frame_pointer_size_); // JNIEnv or jobject/jclass
} else {
- int arg_pos = GetIteratorPositionWithinShorty();
- return ParamSize(arg_pos);
+ size_t arg_pos = GetIteratorPositionWithinShorty();
+ // References are converted to `jobject` for the native call. Pass `frame_pointer_size_`.
+ return ParamSize(arg_pos, /*reference_size=*/ static_cast<size_t>(frame_pointer_size_));
}
}
@@ -305,7 +322,7 @@ bool JniCallingConvention::HasSelfClass() const {
}
}
-unsigned int JniCallingConvention::GetIteratorPositionWithinShorty() const {
+size_t JniCallingConvention::GetIteratorPositionWithinShorty() const {
// We need to subtract out the extra JNI arguments if we want to use this iterator position
// with the inherited CallingConvention member functions, which rely on scanning the shorty.
// Note that our shorty does *not* include the JNIEnv, jclass/jobject parameters.
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index 0187b14256..2657e943e6 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -178,14 +178,18 @@ class CallingConvention : public DeletableArenaObject<kArenaAllocCallingConventi
size_t NumReferenceArgs() const {
return num_ref_args_;
}
- size_t ParamSize(unsigned int param) const {
+ size_t ParamSize(size_t param, size_t reference_size) const {
DCHECK_LT(param, NumArgs());
if (IsStatic()) {
param++; // 0th argument must skip return value at start of the shorty
} else if (param == 0) {
- return sizeof(mirror::HeapReference<mirror::Object>); // this argument
+ return reference_size; // this argument
}
- size_t result = Primitive::ComponentSize(Primitive::GetType(shorty_[param]));
+ Primitive::Type type = Primitive::GetType(shorty_[param]);
+ if (type == Primitive::kPrimNot) {
+ return reference_size;
+ }
+ size_t result = Primitive::ComponentSize(type);
if (result >= 1 && result < 4) {
result = 4;
}
@@ -344,7 +348,7 @@ class JniCallingConvention : public CallingConvention {
return IsCurrentParamALong() || IsCurrentParamADouble();
}
bool IsCurrentParamJniEnv();
- size_t CurrentParamSize() const;
+ virtual size_t CurrentParamSize() const;
virtual bool IsCurrentParamInRegister() = 0;
virtual bool IsCurrentParamOnStack() = 0;
virtual ManagedRegister CurrentParamRegister() = 0;
@@ -432,7 +436,7 @@ class JniCallingConvention : public CallingConvention {
bool HasSelfClass() const;
// Returns the position of itr_args_, fixed up by removing the offset of extra JNI arguments.
- unsigned int GetIteratorPositionWithinShorty() const;
+ size_t GetIteratorPositionWithinShorty() const;
// Is the current argument (at the iterator) an extra argument for JNI?
bool IsCurrentArgExtraForJni() const;
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index c60d97467e..9349d2c9fd 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -154,11 +154,11 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
// -- Don't allow any objects as parameter or return value
if (UNLIKELY(is_critical_native)) {
CHECK(is_static)
- << "@CriticalNative functions cannot be virtual since that would"
+ << "@CriticalNative functions cannot be virtual since that would "
<< "require passing a reference parameter (this), which is illegal "
<< dex_file.PrettyMethod(method_idx, /* with_signature= */ true);
CHECK(!is_synchronized)
- << "@CriticalNative functions cannot be synchronized since that would"
+ << "@CriticalNative functions cannot be synchronized since that would "
<< "require passing a (class and/or this) reference parameter, which is illegal "
<< dex_file.PrettyMethod(method_idx, /* with_signature= */ true);
for (size_t i = 0; i < strlen(shorty); ++i) {
@@ -387,8 +387,8 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
DCHECK(main_jni_conv->HasNext());
static_assert(kObjectReferenceSize == 4u);
bool is_reference = mr_conv->IsCurrentParamAReference();
- size_t src_size = (!is_reference && mr_conv->IsCurrentParamALongOrDouble()) ? 8u : 4u;
- size_t dest_size = is_reference ? kRawPointerSize : src_size;
+ size_t src_size = mr_conv->CurrentParamSize();
+ size_t dest_size = main_jni_conv->CurrentParamSize();
src_args.push_back(mr_conv->IsCurrentParamInRegister()
? ArgumentLocation(mr_conv->CurrentParamRegister(), src_size)
: ArgumentLocation(mr_conv->CurrentParamStackOffset(), src_size));
@@ -621,7 +621,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
main_jni_conv->CalleeSaveScratchRegisters()[0], kObjectReferenceSize);
// Load the declaring class reference.
DCHECK_EQ(ArtMethod::DeclaringClassOffset().SizeValue(), 0u);
- __ Load(temp, method_register, MemberOffset(0u), kObjectReferenceSize);
+ __ LoadGcRootWithoutReadBarrier(temp, method_register, MemberOffset(0u));
// Return to main path if the class object is marked.
__ TestMarkBit(temp, jclass_read_barrier_return.get(), JNIMacroUnaryCondition::kNotZero);
}
@@ -724,7 +724,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
size_t cs = __ CodeSize();
std::vector<uint8_t> managed_code(cs);
MemoryRegion code(&managed_code[0], managed_code.size());
- __ FinalizeInstructions(code);
+ __ CopyInstructions(code);
return JniCompiledMethod(instruction_set,
std::move(managed_code),
diff --git a/compiler/jni/quick/riscv64/calling_convention_riscv64.cc b/compiler/jni/quick/riscv64/calling_convention_riscv64.cc
new file mode 100644
index 0000000000..b083fec14a
--- /dev/null
+++ b/compiler/jni/quick/riscv64/calling_convention_riscv64.cc
@@ -0,0 +1,429 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "calling_convention_riscv64.h"
+
+#include <android-base/logging.h>
+
+#include "arch/instruction_set.h"
+#include "arch/riscv64/jni_frame_riscv64.h"
+#include "utils/riscv64/managed_register_riscv64.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+static constexpr ManagedRegister kXArgumentRegisters[] = {
+ Riscv64ManagedRegister::FromXRegister(A0),
+ Riscv64ManagedRegister::FromXRegister(A1),
+ Riscv64ManagedRegister::FromXRegister(A2),
+ Riscv64ManagedRegister::FromXRegister(A3),
+ Riscv64ManagedRegister::FromXRegister(A4),
+ Riscv64ManagedRegister::FromXRegister(A5),
+ Riscv64ManagedRegister::FromXRegister(A6),
+ Riscv64ManagedRegister::FromXRegister(A7),
+};
+static_assert(kMaxIntLikeArgumentRegisters == arraysize(kXArgumentRegisters));
+
+static const FRegister kFArgumentRegisters[] = {
+ FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7
+};
+static_assert(kMaxFloatOrDoubleArgumentRegisters == arraysize(kFArgumentRegisters));
+
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+ // Core registers.
+ Riscv64ManagedRegister::FromXRegister(S0),
+ // ART thread register (TR = S1) is not saved on the stack.
+ Riscv64ManagedRegister::FromXRegister(S2),
+ Riscv64ManagedRegister::FromXRegister(S3),
+ Riscv64ManagedRegister::FromXRegister(S4),
+ Riscv64ManagedRegister::FromXRegister(S5),
+ Riscv64ManagedRegister::FromXRegister(S6),
+ Riscv64ManagedRegister::FromXRegister(S7),
+ Riscv64ManagedRegister::FromXRegister(S8),
+ Riscv64ManagedRegister::FromXRegister(S9),
+ Riscv64ManagedRegister::FromXRegister(S10),
+ Riscv64ManagedRegister::FromXRegister(S11),
+ Riscv64ManagedRegister::FromXRegister(RA),
+
+ // Hard float registers.
+ Riscv64ManagedRegister::FromFRegister(FS0),
+ Riscv64ManagedRegister::FromFRegister(FS1),
+ Riscv64ManagedRegister::FromFRegister(FS2),
+ Riscv64ManagedRegister::FromFRegister(FS3),
+ Riscv64ManagedRegister::FromFRegister(FS4),
+ Riscv64ManagedRegister::FromFRegister(FS5),
+ Riscv64ManagedRegister::FromFRegister(FS6),
+ Riscv64ManagedRegister::FromFRegister(FS7),
+ Riscv64ManagedRegister::FromFRegister(FS8),
+ Riscv64ManagedRegister::FromFRegister(FS9),
+ Riscv64ManagedRegister::FromFRegister(FS10),
+ Riscv64ManagedRegister::FromFRegister(FS11),
+};
+
+template <size_t size>
+static constexpr uint32_t CalculateCoreCalleeSpillMask(
+ const ManagedRegister (&callee_saves)[size]) {
+ uint32_t result = 0u;
+ for (auto&& r : callee_saves) {
+ if (r.AsRiscv64().IsXRegister()) {
+ result |= (1u << r.AsRiscv64().AsXRegister());
+ }
+ }
+ return result;
+}
+
+template <size_t size>
+static constexpr uint32_t CalculateFpCalleeSpillMask(const ManagedRegister (&callee_saves)[size]) {
+ uint32_t result = 0u;
+ for (auto&& r : callee_saves) {
+ if (r.AsRiscv64().IsFRegister()) {
+ result |= (1u << r.AsRiscv64().AsFRegister());
+ }
+ }
+ return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask(kCalleeSaveRegisters);
+static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask(kCalleeSaveRegisters);
+
+static constexpr ManagedRegister kNativeCalleeSaveRegisters[] = {
+ // Core registers.
+ Riscv64ManagedRegister::FromXRegister(S0),
+ Riscv64ManagedRegister::FromXRegister(S1),
+ Riscv64ManagedRegister::FromXRegister(S2),
+ Riscv64ManagedRegister::FromXRegister(S3),
+ Riscv64ManagedRegister::FromXRegister(S4),
+ Riscv64ManagedRegister::FromXRegister(S5),
+ Riscv64ManagedRegister::FromXRegister(S6),
+ Riscv64ManagedRegister::FromXRegister(S7),
+ Riscv64ManagedRegister::FromXRegister(S8),
+ Riscv64ManagedRegister::FromXRegister(S9),
+ Riscv64ManagedRegister::FromXRegister(S10),
+ Riscv64ManagedRegister::FromXRegister(S11),
+ Riscv64ManagedRegister::FromXRegister(RA),
+
+ // Hard float registers.
+ Riscv64ManagedRegister::FromFRegister(FS0),
+ Riscv64ManagedRegister::FromFRegister(FS1),
+ Riscv64ManagedRegister::FromFRegister(FS2),
+ Riscv64ManagedRegister::FromFRegister(FS3),
+ Riscv64ManagedRegister::FromFRegister(FS4),
+ Riscv64ManagedRegister::FromFRegister(FS5),
+ Riscv64ManagedRegister::FromFRegister(FS6),
+ Riscv64ManagedRegister::FromFRegister(FS7),
+ Riscv64ManagedRegister::FromFRegister(FS8),
+ Riscv64ManagedRegister::FromFRegister(FS9),
+ Riscv64ManagedRegister::FromFRegister(FS10),
+ Riscv64ManagedRegister::FromFRegister(FS11),
+};
+
+static constexpr uint32_t kNativeCoreCalleeSpillMask =
+ CalculateCoreCalleeSpillMask(kNativeCalleeSaveRegisters);
+static constexpr uint32_t kNativeFpCalleeSpillMask =
+ CalculateFpCalleeSpillMask(kNativeCalleeSaveRegisters);
+
+static ManagedRegister ReturnRegisterForShorty(const char* shorty) {
+ if (shorty[0] == 'F' || shorty[0] == 'D') {
+ return Riscv64ManagedRegister::FromFRegister(FA0);
+ } else if (shorty[0] == 'V') {
+ return Riscv64ManagedRegister::NoRegister();
+ } else {
+ // All other return types use A0. Note that there is no managed type wide enough to use A1/FA1.
+ return Riscv64ManagedRegister::FromXRegister(A0);
+ }
+}
+
+// Managed runtime calling convention
+
+ManagedRegister Riscv64ManagedRuntimeCallingConvention::ReturnRegister() const {
+ return ReturnRegisterForShorty(GetShorty());
+}
+
+ManagedRegister Riscv64ManagedRuntimeCallingConvention::MethodRegister() {
+ return Riscv64ManagedRegister::FromXRegister(A0);
+}
+
+ManagedRegister Riscv64ManagedRuntimeCallingConvention::ArgumentRegisterForMethodExitHook() {
+ DCHECK(!Riscv64ManagedRegister::FromXRegister(A4).Overlaps(ReturnRegister().AsRiscv64()));
+ return Riscv64ManagedRegister::FromXRegister(A4);
+}
+
+bool Riscv64ManagedRuntimeCallingConvention::IsCurrentParamInRegister() {
+ // Note: The managed ABI does not pass FP args in general purpose registers.
+ // This differs from the native ABI which does that after using all FP arg registers.
+ if (IsCurrentParamAFloatOrDouble()) {
+ return itr_float_and_doubles_ < kMaxFloatOrDoubleArgumentRegisters;
+ } else {
+ size_t non_fp_arg_number = itr_args_ - itr_float_and_doubles_;
+ return /* method */ 1u + non_fp_arg_number < kMaxIntLikeArgumentRegisters;
+ }
+}
+
+bool Riscv64ManagedRuntimeCallingConvention::IsCurrentParamOnStack() {
+ return !IsCurrentParamInRegister();
+}
+
+ManagedRegister Riscv64ManagedRuntimeCallingConvention::CurrentParamRegister() {
+ DCHECK(IsCurrentParamInRegister());
+ if (IsCurrentParamAFloatOrDouble()) {
+ return Riscv64ManagedRegister::FromFRegister(kFArgumentRegisters[itr_float_and_doubles_]);
+ } else {
+ size_t non_fp_arg_number = itr_args_ - itr_float_and_doubles_;
+ return kXArgumentRegisters[/* method */ 1u + non_fp_arg_number];
+ }
+}
+
+FrameOffset Riscv64ManagedRuntimeCallingConvention::CurrentParamStackOffset() {
+ return FrameOffset(displacement_.Int32Value() + // displacement
+ kFramePointerSize + // Method ref
+ (itr_slots_ * sizeof(uint32_t))); // offset into in args
+}
+
+// JNI calling convention
+
+Riscv64JniCallingConvention::Riscv64JniCallingConvention(bool is_static,
+ bool is_synchronized,
+ bool is_fast_native,
+ bool is_critical_native,
+ const char* shorty)
+ : JniCallingConvention(is_static,
+ is_synchronized,
+ is_fast_native,
+ is_critical_native,
+ shorty,
+ kRiscv64PointerSize) {
+}
+
+ManagedRegister Riscv64JniCallingConvention::ReturnRegister() const {
+ return ReturnRegisterForShorty(GetShorty());
+}
+
+ManagedRegister Riscv64JniCallingConvention::IntReturnRegister() const {
+ return Riscv64ManagedRegister::FromXRegister(A0);
+}
+
+size_t Riscv64JniCallingConvention::FrameSize() const {
+ if (is_critical_native_) {
+ CHECK(!SpillsMethod());
+ CHECK(!HasLocalReferenceSegmentState());
+ return 0u; // There is no managed frame for @CriticalNative.
+ }
+
+ // Method*, callee save area size, local reference segment state
+ DCHECK(SpillsMethod());
+ size_t method_ptr_size = static_cast<size_t>(kFramePointerSize);
+ size_t callee_save_area_size = CalleeSaveRegisters().size() * kFramePointerSize;
+ size_t total_size = method_ptr_size + callee_save_area_size;
+
+ DCHECK(HasLocalReferenceSegmentState());
+ // Cookie is saved in one of the spilled registers.
+
+ return RoundUp(total_size, kStackAlignment);
+}
+
+size_t Riscv64JniCallingConvention::OutFrameSize() const {
+ // Count param args, including JNIEnv* and jclass*.
+ size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs();
+ size_t num_fp_args = NumFloatOrDoubleArgs();
+ DCHECK_GE(all_args, num_fp_args);
+ size_t num_non_fp_args = all_args - num_fp_args;
+ // The size of outgoing arguments.
+ size_t size = GetNativeOutArgsSize(num_fp_args, num_non_fp_args);
+
+ // @CriticalNative can use tail call as all managed callee saves are preserved by AAPCS64.
+ static_assert((kCoreCalleeSpillMask & ~kNativeCoreCalleeSpillMask) == 0u);
+ static_assert((kFpCalleeSpillMask & ~kNativeFpCalleeSpillMask) == 0u);
+
+ // For @CriticalNative, we can make a tail call if there are no stack args.
+ // Otherwise, add space for return PC.
+ // Note: Result does not neeed to be zero- or sign-extended.
+ DCHECK(!RequiresSmallResultTypeExtension());
+ if (is_critical_native_ && size != 0u) {
+ size += kFramePointerSize; // We need to spill RA with the args.
+ }
+ size_t out_args_size = RoundUp(size, kNativeStackAlignment);
+ if (UNLIKELY(IsCriticalNative())) {
+ DCHECK_EQ(out_args_size, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
+ }
+ return out_args_size;
+}
+
+ArrayRef<const ManagedRegister> Riscv64JniCallingConvention::CalleeSaveRegisters() const {
+ if (UNLIKELY(IsCriticalNative())) {
+ if (UseTailCall()) {
+ return ArrayRef<const ManagedRegister>(); // Do not spill anything.
+ } else {
+ // Spill RA with out args.
+ static_assert((kCoreCalleeSpillMask & (1 << RA)) != 0u); // Contains RA.
+ constexpr size_t ra_index = POPCOUNT(kCoreCalleeSpillMask) - 1u;
+ static_assert(kCalleeSaveRegisters[ra_index].Equals(
+ Riscv64ManagedRegister::FromXRegister(RA)));
+ return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters).SubArray(
+ /*pos=*/ ra_index, /*length=*/ 1u);
+ }
+ } else {
+ return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+ }
+}
+
+ArrayRef<const ManagedRegister> Riscv64JniCallingConvention::CalleeSaveScratchRegisters() const {
+ DCHECK(!IsCriticalNative());
+ // Use S3-S11 from managed callee saves. All these registers are also native callee saves.
+ constexpr size_t kStart = 2u;
+ constexpr size_t kLength = 9u;
+ static_assert(kCalleeSaveRegisters[kStart].Equals(Riscv64ManagedRegister::FromXRegister(S3)));
+ static_assert(kCalleeSaveRegisters[kStart + kLength - 1u].Equals(
+ Riscv64ManagedRegister::FromXRegister(S11)));
+ static_assert((kCoreCalleeSpillMask & ~kNativeCoreCalleeSpillMask) == 0u);
+ return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters).SubArray(kStart, kLength);
+}
+
+ArrayRef<const ManagedRegister> Riscv64JniCallingConvention::ArgumentScratchRegisters() const {
+ DCHECK(!IsCriticalNative());
+ // Exclude A0 if it's used as a return register.
+ static_assert(kXArgumentRegisters[0].Equals(Riscv64ManagedRegister::FromXRegister(A0)));
+ ArrayRef<const ManagedRegister> scratch_regs(kXArgumentRegisters);
+ Riscv64ManagedRegister return_reg = ReturnRegister().AsRiscv64();
+ auto return_reg_overlaps = [return_reg](ManagedRegister reg) {
+ return return_reg.Overlaps(reg.AsRiscv64());
+ };
+ if (return_reg_overlaps(scratch_regs[0])) {
+ scratch_regs = scratch_regs.SubArray(/*pos=*/ 1u);
+ }
+ DCHECK(std::none_of(scratch_regs.begin(), scratch_regs.end(), return_reg_overlaps));
+ return scratch_regs;
+}
+
+uint32_t Riscv64JniCallingConvention::CoreSpillMask() const {
+ return is_critical_native_ ? 0u : kCoreCalleeSpillMask;
+}
+
+uint32_t Riscv64JniCallingConvention::FpSpillMask() const {
+ return is_critical_native_ ? 0u : kFpCalleeSpillMask;
+}
+
+size_t Riscv64JniCallingConvention::CurrentParamSize() const {
+ if (IsCurrentArgExtraForJni()) {
+ return static_cast<size_t>(frame_pointer_size_); // JNIEnv or jobject/jclass
+ } else {
+ size_t arg_pos = GetIteratorPositionWithinShorty();
+ DCHECK_LT(arg_pos, NumArgs());
+ if (IsStatic()) {
+ ++arg_pos; // 0th argument must skip return value at start of the shorty
+ } else if (arg_pos == 0) {
+ return static_cast<size_t>(kRiscv64PointerSize); // this argument
+ }
+ // The riscv64 native calling convention specifies that integers narrower than XLEN (64)
+ // bits are "widened according to the sign of their type up to 32 bits, then sign-extended
+ // to XLEN bits." Thus, everything other than `float` (which has the high 32 bits undefined)
+ // is passed as 64 bits, whether in register, or on the stack.
+ return (GetShorty()[arg_pos] == 'F') ? 4u : static_cast<size_t>(kRiscv64PointerSize);
+ }
+}
+
+bool Riscv64JniCallingConvention::IsCurrentParamInRegister() {
+ // FP args use FPRs, then GPRs and only then the stack.
+ if (itr_float_and_doubles_ < kMaxFloatOrDoubleArgumentRegisters) {
+ if (IsCurrentParamAFloatOrDouble()) {
+ return true;
+ } else {
+ size_t num_non_fp_args = itr_args_ - itr_float_and_doubles_;
+ return num_non_fp_args < kMaxIntLikeArgumentRegisters;
+ }
+ } else {
+ return (itr_args_ < kMaxFloatOrDoubleArgumentRegisters + kMaxIntLikeArgumentRegisters);
+ }
+}
+
+bool Riscv64JniCallingConvention::IsCurrentParamOnStack() {
+ return !IsCurrentParamInRegister();
+}
+
+ManagedRegister Riscv64JniCallingConvention::CurrentParamRegister() {
+ // FP args use FPRs, then GPRs and only then the stack.
+ CHECK(IsCurrentParamInRegister());
+ if (itr_float_and_doubles_ < kMaxFloatOrDoubleArgumentRegisters) {
+ if (IsCurrentParamAFloatOrDouble()) {
+ return Riscv64ManagedRegister::FromFRegister(kFArgumentRegisters[itr_float_and_doubles_]);
+ } else {
+ size_t num_non_fp_args = itr_args_ - itr_float_and_doubles_;
+ DCHECK_LT(num_non_fp_args, kMaxIntLikeArgumentRegisters);
+ return kXArgumentRegisters[num_non_fp_args];
+ }
+ } else {
+ // This argument is in a GPR, whether it's a FP arg or a non-FP arg.
+ DCHECK_LT(itr_args_, kMaxFloatOrDoubleArgumentRegisters + kMaxIntLikeArgumentRegisters);
+ return kXArgumentRegisters[itr_args_ - kMaxFloatOrDoubleArgumentRegisters];
+ }
+}
+
+FrameOffset Riscv64JniCallingConvention::CurrentParamStackOffset() {
+ CHECK(IsCurrentParamOnStack());
+ // Account for FP arguments passed through FA0-FA7.
+ // All other args are passed through A0-A7 (even FP args) and the stack.
+ size_t num_gpr_and_stack_args =
+ itr_args_ - std::min<size_t>(kMaxFloatOrDoubleArgumentRegisters, itr_float_and_doubles_);
+ size_t args_on_stack =
+ num_gpr_and_stack_args - std::min(kMaxIntLikeArgumentRegisters, num_gpr_and_stack_args);
+ size_t offset = displacement_.Int32Value() - OutFrameSize() + (args_on_stack * kFramePointerSize);
+ CHECK_LT(offset, OutFrameSize());
+ return FrameOffset(offset);
+}
+
+bool Riscv64JniCallingConvention::RequiresSmallResultTypeExtension() const {
+ // RISC-V native calling convention requires values to be returned the way that the first
+ // argument would be passed. Arguments are zero-/sign-extended to 32 bits based on their
+ // type, then sign-extended to 64 bits. This is the same as in the ART mamaged ABI.
+ // (Not applicable to FP args which are returned in `FA0`. A `float` is NaN-boxed.)
+ return false;
+}
+
+// T0 is neither managed callee-save, nor argument register. It is suitable for use as the
+// locking argument for synchronized methods and hidden argument for @CriticalNative methods.
+static void AssertT0IsNeitherCalleeSaveNorArgumentRegister() {
+ // TODO: Change to static_assert; std::none_of should be constexpr since C++20.
+ DCHECK(std::none_of(kCalleeSaveRegisters,
+ kCalleeSaveRegisters + std::size(kCalleeSaveRegisters),
+ [](ManagedRegister callee_save) constexpr {
+ return callee_save.Equals(Riscv64ManagedRegister::FromXRegister(T0));
+ }));
+ DCHECK(std::none_of(kXArgumentRegisters,
+ kXArgumentRegisters + std::size(kXArgumentRegisters),
+ [](ManagedRegister arg) { return arg.AsRiscv64().AsXRegister() == T0; }));
+}
+
+ManagedRegister Riscv64JniCallingConvention::LockingArgumentRegister() const {
+ DCHECK(!IsFastNative());
+ DCHECK(!IsCriticalNative());
+ DCHECK(IsSynchronized());
+ AssertT0IsNeitherCalleeSaveNorArgumentRegister();
+ return Riscv64ManagedRegister::FromXRegister(T0);
+}
+
+ManagedRegister Riscv64JniCallingConvention::HiddenArgumentRegister() const {
+ DCHECK(IsCriticalNative());
+ AssertT0IsNeitherCalleeSaveNorArgumentRegister();
+ return Riscv64ManagedRegister::FromXRegister(T0);
+}
+
+// Whether to use tail call (used only for @CriticalNative).
+bool Riscv64JniCallingConvention::UseTailCall() const {
+ CHECK(IsCriticalNative());
+ return OutFrameSize() == 0u;
+}
+
+} // namespace riscv64
+} // namespace art
diff --git a/compiler/jni/quick/riscv64/calling_convention_riscv64.h b/compiler/jni/quick/riscv64/calling_convention_riscv64.h
new file mode 100644
index 0000000000..5add183f72
--- /dev/null
+++ b/compiler/jni/quick/riscv64/calling_convention_riscv64.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_JNI_QUICK_RISCV64_CALLING_CONVENTION_RISCV64_H_
+#define ART_COMPILER_JNI_QUICK_RISCV64_CALLING_CONVENTION_RISCV64_H_
+
+#include "base/enums.h"
+#include "base/macros.h"
+#include "jni/quick/calling_convention.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+class Riscv64ManagedRuntimeCallingConvention final : public ManagedRuntimeCallingConvention {
+ public:
+ Riscv64ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty)
+ : ManagedRuntimeCallingConvention(is_static,
+ is_synchronized,
+ shorty,
+ PointerSize::k64) {}
+ ~Riscv64ManagedRuntimeCallingConvention() override {}
+ // Calling convention
+ ManagedRegister ReturnRegister() const override;
+ // Managed runtime calling convention
+ ManagedRegister MethodRegister() override;
+ ManagedRegister ArgumentRegisterForMethodExitHook() override;
+ bool IsCurrentParamInRegister() override;
+ bool IsCurrentParamOnStack() override;
+ ManagedRegister CurrentParamRegister() override;
+ FrameOffset CurrentParamStackOffset() override;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Riscv64ManagedRuntimeCallingConvention);
+};
+
+class Riscv64JniCallingConvention final : public JniCallingConvention {
+ public:
+ Riscv64JniCallingConvention(bool is_static,
+ bool is_synchronized,
+ bool is_fast_native,
+ bool is_critical_native,
+ const char* shorty);
+ ~Riscv64JniCallingConvention() override {}
+ // Calling convention
+ ManagedRegister ReturnRegister() const override;
+ ManagedRegister IntReturnRegister() const override;
+ // JNI calling convention
+ size_t FrameSize() const override;
+ size_t OutFrameSize() const override;
+ ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
+ ArrayRef<const ManagedRegister> CalleeSaveScratchRegisters() const override;
+ ArrayRef<const ManagedRegister> ArgumentScratchRegisters() const override;
+ uint32_t CoreSpillMask() const override;
+ uint32_t FpSpillMask() const override;
+ size_t CurrentParamSize() const override;
+ bool IsCurrentParamInRegister() override;
+ bool IsCurrentParamOnStack() override;
+ ManagedRegister CurrentParamRegister() override;
+ FrameOffset CurrentParamStackOffset() override;
+ bool RequiresSmallResultTypeExtension() const override;
+
+ // Locking argument register, used to pass the synchronization object for calls
+ // to `JniLockObject()` and `JniUnlockObject()`.
+ ManagedRegister LockingArgumentRegister() const override;
+
+ // Hidden argument register, used to pass the method pointer for @CriticalNative call.
+ ManagedRegister HiddenArgumentRegister() const override;
+
+ // Whether to use tail call (used only for @CriticalNative).
+ bool UseTailCall() const override;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Riscv64JniCallingConvention);
+};
+
+} // namespace riscv64
+} // namespace art
+
+#endif // ART_COMPILER_JNI_QUICK_RISCV64_CALLING_CONVENTION_RISCV64_H_
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index 9d0761d2f7..0f981dd6df 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -117,7 +117,7 @@ ArrayRef<const ManagedRegister> X86_64JniCallingConvention::ArgumentScratchRegis
return scratch_regs;
}
-static ManagedRegister ReturnRegisterForShorty(const char* shorty, bool jni ATTRIBUTE_UNUSED) {
+static ManagedRegister ReturnRegisterForShorty(const char* shorty, [[maybe_unused]] bool jni) {
if (shorty[0] == 'F' || shorty[0] == 'D') {
return X86_64ManagedRegister::FromXmmRegister(XMM0);
} else if (shorty[0] == 'J') {
diff --git a/compiler/linker/output_stream_test.cc b/compiler/linker/output_stream_test.cc
index 22b174fce6..6b62874643 100644
--- a/compiler/linker/output_stream_test.cc
+++ b/compiler/linker/output_stream_test.cc
@@ -107,13 +107,13 @@ TEST_F(OutputStreamTest, BufferedFlush) {
flush_called(false) { }
~CheckingOutputStream() override {}
- bool WriteFully(const void* buffer ATTRIBUTE_UNUSED,
- size_t byte_count ATTRIBUTE_UNUSED) override {
+ bool WriteFully([[maybe_unused]] const void* buffer,
+ [[maybe_unused]] size_t byte_count) override {
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
}
- off_t Seek(off_t offset ATTRIBUTE_UNUSED, Whence whence ATTRIBUTE_UNUSED) override {
+ off_t Seek([[maybe_unused]] off_t offset, [[maybe_unused]] Whence whence) override {
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
}
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc
index 703584c537..9da2bfb8ef 100644
--- a/compiler/optimizing/block_builder.cc
+++ b/compiler/optimizing/block_builder.cc
@@ -20,7 +20,6 @@
#include "dex/bytecode_utils.h"
#include "dex/code_item_accessors-inl.h"
#include "dex/dex_file_exception_helpers.h"
-#include "quicken_info.h"
namespace art HIDDEN {
@@ -40,9 +39,7 @@ HBasicBlockBuilder::HBasicBlockBuilder(HGraph* graph,
local_allocator->Adapter(kArenaAllocGraphBuilder)),
throwing_blocks_(kDefaultNumberOfThrowingBlocks,
local_allocator->Adapter(kArenaAllocGraphBuilder)),
- number_of_branches_(0u),
- quicken_index_for_dex_pc_(std::less<uint32_t>(),
- local_allocator->Adapter(kArenaAllocGraphBuilder)) {}
+ number_of_branches_(0u) {}
HBasicBlock* HBasicBlockBuilder::MaybeCreateBlockAt(uint32_t dex_pc) {
return MaybeCreateBlockAt(dex_pc, dex_pc);
@@ -147,7 +144,6 @@ void HBasicBlockBuilder::ConnectBasicBlocks() {
HBasicBlock* block = graph_->GetEntryBlock();
graph_->AddBlock(block);
- size_t quicken_index = 0;
bool is_throwing_block = false;
// Calculate the qucikening index here instead of CreateBranchTargets since it's easier to
// calculate in dex_pc order.
@@ -158,8 +154,6 @@ void HBasicBlockBuilder::ConnectBasicBlocks() {
// Check if this dex_pc address starts a new basic block.
HBasicBlock* next_block = GetBlockAt(dex_pc);
if (next_block != nullptr) {
- // We only need quicken index entries for basic block boundaries.
- quicken_index_for_dex_pc_.Put(dex_pc, quicken_index);
if (block != nullptr) {
// Last instruction did not end its basic block but a new one starts here.
// It must have been a block falling through into the next one.
@@ -169,10 +163,6 @@ void HBasicBlockBuilder::ConnectBasicBlocks() {
is_throwing_block = false;
graph_->AddBlock(block);
}
- // Make sure to increment this before the continues.
- if (QuickenInfoTable::NeedsIndexForInstruction(&instruction)) {
- ++quicken_index;
- }
if (block == nullptr) {
// Ignore dead code.
@@ -483,8 +473,4 @@ void HBasicBlockBuilder::BuildIntrinsic() {
body->AddSuccessor(exit_block);
}
-size_t HBasicBlockBuilder::GetQuickenIndex(uint32_t dex_pc) const {
- return quicken_index_for_dex_pc_.Get(dex_pc);
-}
-
} // namespace art
diff --git a/compiler/optimizing/block_builder.h b/compiler/optimizing/block_builder.h
index 8668ef8221..1aa9375e5a 100644
--- a/compiler/optimizing/block_builder.h
+++ b/compiler/optimizing/block_builder.h
@@ -45,8 +45,6 @@ class HBasicBlockBuilder : public ValueObject {
size_t GetNumberOfBranches() const { return number_of_branches_; }
HBasicBlock* GetBlockAt(uint32_t dex_pc) const { return branch_targets_[dex_pc]; }
- size_t GetQuickenIndex(uint32_t dex_pc) const;
-
private:
// Creates a basic block starting at given `dex_pc`.
HBasicBlock* MaybeCreateBlockAt(uint32_t dex_pc);
@@ -83,9 +81,6 @@ class HBasicBlockBuilder : public ValueObject {
ScopedArenaVector<HBasicBlock*> throwing_blocks_;
size_t number_of_branches_;
- // A table to quickly find the quicken index for the first instruction of a basic block.
- ScopedArenaSafeMap<uint32_t, uint32_t> quicken_index_for_dex_pc_;
-
static constexpr size_t kDefaultNumberOfThrowingBlocks = 2u;
DISALLOW_COPY_AND_ASSIGN(HBasicBlockBuilder);
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 919abfdc49..c0d4c37659 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1047,14 +1047,14 @@ class BCEVisitor final : public HGraphVisitor {
HDiv* div = nullptr;
int64_t const_divisor = 0;
- if (HMul* mul = instruction->GetRight()->AsMul()) {
+ if (HMul* mul = instruction->GetRight()->AsMulOrNull()) {
if (!mul->GetLeft()->IsDiv() || !mul->GetRight()->IsConstant()) {
return false;
}
div = mul->GetLeft()->AsDiv();
const_divisor = Int64FromConstant(mul->GetRight()->AsConstant());
- } else if (HAdd* add = instruction->GetRight()->AsAdd()) {
- HShl* shl = add->GetRight()->AsShl();
+ } else if (HAdd* add = instruction->GetRight()->AsAddOrNull()) {
+ HShl* shl = add->GetRight()->AsShlOrNull();
if (!is_needed_shl(shl)) {
return false;
}
@@ -1070,8 +1070,8 @@ class BCEVisitor final : public HGraphVisitor {
return false;
}
const_divisor = (1LL << n) + 1;
- } else if (HSub* sub = instruction->GetRight()->AsSub()) {
- HShl* shl = sub->GetLeft()->AsShl();
+ } else if (HSub* sub = instruction->GetRight()->AsSubOrNull()) {
+ HShl* shl = sub->GetLeft()->AsShlOrNull();
if (!is_needed_shl(shl)) {
return false;
}
@@ -1378,8 +1378,7 @@ class BCEVisitor final : public HGraphVisitor {
HInstruction* array_length,
HInstruction* base,
int32_t min_c, int32_t max_c) {
- HBoundsCheck* bounds_check =
- first_index_bounds_check_map_.Get(array_length->GetId())->AsBoundsCheck();
+ HBoundsCheck* bounds_check = first_index_bounds_check_map_.Get(array_length->GetId());
// Construct deoptimization on single or double bounds on range [base-min_c,base+max_c],
// for example either for a[0]..a[3] just 3 or for a[base-1]..a[base+3] both base-1
// and base+3, since we made the assumption any in between value may occur too.
diff --git a/compiler/optimizing/code_generation_data.cc b/compiler/optimizing/code_generation_data.cc
new file mode 100644
index 0000000000..7b23d46dc5
--- /dev/null
+++ b/compiler/optimizing/code_generation_data.cc
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "class_linker.h"
+#include "code_generation_data.h"
+#include "code_generator.h"
+#include "intern_table.h"
+#include "mirror/object-inl.h"
+#include "runtime.h"
+
+namespace art HIDDEN {
+
+void CodeGenerationData::EmitJitRoots(
+ /*out*/std::vector<Handle<mirror::Object>>* roots) {
+ DCHECK(roots->empty());
+ roots->reserve(GetNumberOfJitRoots());
+ ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+ size_t index = 0;
+ for (auto& entry : jit_string_roots_) {
+ // Update the `roots` with the string, and replace the address temporarily
+ // stored to the index in the table.
+ uint64_t address = entry.second;
+ roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address));
+ DCHECK(roots->back() != nullptr);
+ DCHECK(roots->back()->IsString());
+ entry.second = index;
+ // Ensure the string is strongly interned. This is a requirement on how the JIT
+ // handles strings. b/32995596
+ class_linker->GetInternTable()->InternStrong(roots->back()->AsString());
+ ++index;
+ }
+ for (auto& entry : jit_class_roots_) {
+ // Update the `roots` with the class, and replace the address temporarily
+ // stored to the index in the table.
+ uint64_t address = entry.second;
+ roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address));
+ DCHECK(roots->back() != nullptr);
+ DCHECK(roots->back()->IsClass());
+ entry.second = index;
+ ++index;
+ }
+}
+
+} // namespace art
diff --git a/compiler/optimizing/code_generation_data.h b/compiler/optimizing/code_generation_data.h
new file mode 100644
index 0000000000..e78ba8f574
--- /dev/null
+++ b/compiler/optimizing/code_generation_data.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATION_DATA_H_
+#define ART_COMPILER_OPTIMIZING_CODE_GENERATION_DATA_H_
+
+#include <memory>
+
+#include "arch/instruction_set.h"
+#include "base/scoped_arena_allocator.h"
+#include "base/scoped_arena_containers.h"
+#include "code_generator.h"
+#include "dex/string_reference.h"
+#include "dex/type_reference.h"
+#include "handle.h"
+#include "mirror/class.h"
+#include "mirror/object.h"
+#include "mirror/string.h"
+#include "stack_map_stream.h"
+
+namespace art HIDDEN {
+
+class CodeGenerationData : public DeletableArenaObject<kArenaAllocCodeGenerator> {
+ public:
+ static std::unique_ptr<CodeGenerationData> Create(ArenaStack* arena_stack,
+ InstructionSet instruction_set) {
+ ScopedArenaAllocator allocator(arena_stack);
+ void* memory = allocator.Alloc<CodeGenerationData>(kArenaAllocCodeGenerator);
+ return std::unique_ptr<CodeGenerationData>(
+ ::new (memory) CodeGenerationData(std::move(allocator), instruction_set));
+ }
+
+ ScopedArenaAllocator* GetScopedAllocator() {
+ return &allocator_;
+ }
+
+ void AddSlowPath(SlowPathCode* slow_path) {
+ slow_paths_.emplace_back(std::unique_ptr<SlowPathCode>(slow_path));
+ }
+
+ ArrayRef<const std::unique_ptr<SlowPathCode>> GetSlowPaths() const {
+ return ArrayRef<const std::unique_ptr<SlowPathCode>>(slow_paths_);
+ }
+
+ StackMapStream* GetStackMapStream() { return &stack_map_stream_; }
+
+ void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string) {
+ jit_string_roots_.Overwrite(string_reference,
+ reinterpret_cast64<uint64_t>(string.GetReference()));
+ }
+
+ uint64_t GetJitStringRootIndex(StringReference string_reference) const {
+ return jit_string_roots_.Get(string_reference);
+ }
+
+ size_t GetNumberOfJitStringRoots() const {
+ return jit_string_roots_.size();
+ }
+
+ void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass) {
+ jit_class_roots_.Overwrite(type_reference, reinterpret_cast64<uint64_t>(klass.GetReference()));
+ }
+
+ uint64_t GetJitClassRootIndex(TypeReference type_reference) const {
+ return jit_class_roots_.Get(type_reference);
+ }
+
+ size_t GetNumberOfJitClassRoots() const {
+ return jit_class_roots_.size();
+ }
+
+ size_t GetNumberOfJitRoots() const {
+ return GetNumberOfJitStringRoots() + GetNumberOfJitClassRoots();
+ }
+
+ void EmitJitRoots(/*out*/std::vector<Handle<mirror::Object>>* roots)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
+ private:
+ CodeGenerationData(ScopedArenaAllocator&& allocator, InstructionSet instruction_set)
+ : allocator_(std::move(allocator)),
+ stack_map_stream_(&allocator_, instruction_set),
+ slow_paths_(allocator_.Adapter(kArenaAllocCodeGenerator)),
+ jit_string_roots_(StringReferenceValueComparator(),
+ allocator_.Adapter(kArenaAllocCodeGenerator)),
+ jit_class_roots_(TypeReferenceValueComparator(),
+ allocator_.Adapter(kArenaAllocCodeGenerator)) {
+ slow_paths_.reserve(kDefaultSlowPathsCapacity);
+ }
+
+ static constexpr size_t kDefaultSlowPathsCapacity = 8;
+
+ ScopedArenaAllocator allocator_;
+ StackMapStream stack_map_stream_;
+ ScopedArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_;
+
+ // Maps a StringReference (dex_file, string_index) to the index in the literal table.
+ // Entries are initially added with a pointer in the handle zone, and `EmitJitRoots`
+ // will compute all the indices.
+ ScopedArenaSafeMap<StringReference, uint64_t, StringReferenceValueComparator> jit_string_roots_;
+
+ // Maps a ClassReference (dex_file, type_index) to the index in the literal table.
+ // Entries are initially added with a pointer in the handle zone, and `EmitJitRoots`
+ // will compute all the indices.
+ ScopedArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_;
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATION_DATA_H_
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index c9f42b52f5..404a42771f 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -44,6 +44,7 @@
#include "base/leb128.h"
#include "class_linker.h"
#include "class_root-inl.h"
+#include "code_generation_data.h"
#include "dex/bytecode_utils.h"
#include "dex/code_item_accessors-inl.h"
#include "graph_visualizer.h"
@@ -141,122 +142,6 @@ static bool CheckTypeConsistency(HInstruction* instruction) {
return true;
}
-class CodeGenerator::CodeGenerationData : public DeletableArenaObject<kArenaAllocCodeGenerator> {
- public:
- static std::unique_ptr<CodeGenerationData> Create(ArenaStack* arena_stack,
- InstructionSet instruction_set) {
- ScopedArenaAllocator allocator(arena_stack);
- void* memory = allocator.Alloc<CodeGenerationData>(kArenaAllocCodeGenerator);
- return std::unique_ptr<CodeGenerationData>(
- ::new (memory) CodeGenerationData(std::move(allocator), instruction_set));
- }
-
- ScopedArenaAllocator* GetScopedAllocator() {
- return &allocator_;
- }
-
- void AddSlowPath(SlowPathCode* slow_path) {
- slow_paths_.emplace_back(std::unique_ptr<SlowPathCode>(slow_path));
- }
-
- ArrayRef<const std::unique_ptr<SlowPathCode>> GetSlowPaths() const {
- return ArrayRef<const std::unique_ptr<SlowPathCode>>(slow_paths_);
- }
-
- StackMapStream* GetStackMapStream() { return &stack_map_stream_; }
-
- void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string) {
- jit_string_roots_.Overwrite(string_reference,
- reinterpret_cast64<uint64_t>(string.GetReference()));
- }
-
- uint64_t GetJitStringRootIndex(StringReference string_reference) const {
- return jit_string_roots_.Get(string_reference);
- }
-
- size_t GetNumberOfJitStringRoots() const {
- return jit_string_roots_.size();
- }
-
- void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass) {
- jit_class_roots_.Overwrite(type_reference, reinterpret_cast64<uint64_t>(klass.GetReference()));
- }
-
- uint64_t GetJitClassRootIndex(TypeReference type_reference) const {
- return jit_class_roots_.Get(type_reference);
- }
-
- size_t GetNumberOfJitClassRoots() const {
- return jit_class_roots_.size();
- }
-
- size_t GetNumberOfJitRoots() const {
- return GetNumberOfJitStringRoots() + GetNumberOfJitClassRoots();
- }
-
- void EmitJitRoots(/*out*/std::vector<Handle<mirror::Object>>* roots)
- REQUIRES_SHARED(Locks::mutator_lock_);
-
- private:
- CodeGenerationData(ScopedArenaAllocator&& allocator, InstructionSet instruction_set)
- : allocator_(std::move(allocator)),
- stack_map_stream_(&allocator_, instruction_set),
- slow_paths_(allocator_.Adapter(kArenaAllocCodeGenerator)),
- jit_string_roots_(StringReferenceValueComparator(),
- allocator_.Adapter(kArenaAllocCodeGenerator)),
- jit_class_roots_(TypeReferenceValueComparator(),
- allocator_.Adapter(kArenaAllocCodeGenerator)) {
- slow_paths_.reserve(kDefaultSlowPathsCapacity);
- }
-
- static constexpr size_t kDefaultSlowPathsCapacity = 8;
-
- ScopedArenaAllocator allocator_;
- StackMapStream stack_map_stream_;
- ScopedArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_;
-
- // Maps a StringReference (dex_file, string_index) to the index in the literal table.
- // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots`
- // will compute all the indices.
- ScopedArenaSafeMap<StringReference, uint64_t, StringReferenceValueComparator> jit_string_roots_;
-
- // Maps a ClassReference (dex_file, type_index) to the index in the literal table.
- // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots`
- // will compute all the indices.
- ScopedArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_;
-};
-
-void CodeGenerator::CodeGenerationData::EmitJitRoots(
- /*out*/std::vector<Handle<mirror::Object>>* roots) {
- DCHECK(roots->empty());
- roots->reserve(GetNumberOfJitRoots());
- ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
- size_t index = 0;
- for (auto& entry : jit_string_roots_) {
- // Update the `roots` with the string, and replace the address temporarily
- // stored to the index in the table.
- uint64_t address = entry.second;
- roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address));
- DCHECK(roots->back() != nullptr);
- DCHECK(roots->back()->IsString());
- entry.second = index;
- // Ensure the string is strongly interned. This is a requirement on how the JIT
- // handles strings. b/32995596
- class_linker->GetInternTable()->InternStrong(roots->back()->AsString());
- ++index;
- }
- for (auto& entry : jit_class_roots_) {
- // Update the `roots` with the class, and replace the address temporarily
- // stored to the index in the table.
- uint64_t address = entry.second;
- roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address));
- DCHECK(roots->back() != nullptr);
- DCHECK(roots->back()->IsClass());
- entry.second = index;
- ++index;
- }
-}
-
ScopedArenaAllocator* CodeGenerator::GetScopedAllocator() {
DCHECK(code_generation_data_ != nullptr);
return code_generation_data_->GetScopedAllocator();
@@ -288,8 +173,8 @@ uint64_t CodeGenerator::GetJitClassRootIndex(TypeReference type_reference) {
return code_generation_data_->GetJitClassRootIndex(type_reference);
}
-void CodeGenerator::EmitJitRootPatches(uint8_t* code ATTRIBUTE_UNUSED,
- const uint8_t* roots_data ATTRIBUTE_UNUSED) {
+void CodeGenerator::EmitJitRootPatches([[maybe_unused]] uint8_t* code,
+ [[maybe_unused]] const uint8_t* roots_data) {
DCHECK(code_generation_data_ != nullptr);
DCHECK_EQ(code_generation_data_->GetNumberOfJitStringRoots(), 0u);
DCHECK_EQ(code_generation_data_->GetNumberOfJitClassRoots(), 0u);
@@ -378,7 +263,7 @@ void CodeGenerator::InitializeCodeGenerationData() {
code_generation_data_ = CodeGenerationData::Create(graph_->GetArenaStack(), GetInstructionSet());
}
-void CodeGenerator::Compile(CodeAllocator* allocator) {
+void CodeGenerator::Compile() {
InitializeCodeGenerationData();
// The register allocator already called `InitializeCodeGeneration`,
@@ -394,7 +279,8 @@ void CodeGenerator::Compile(CodeAllocator* allocator) {
fpu_spill_mask_,
GetGraph()->GetNumberOfVRegs(),
GetGraph()->IsCompilingBaseline(),
- GetGraph()->IsDebuggable());
+ GetGraph()->IsDebuggable(),
+ GetGraph()->HasShouldDeoptimizeFlag());
size_t frame_start = GetAssembler()->CodeSize();
GenerateFrameEntry();
@@ -443,32 +329,28 @@ void CodeGenerator::Compile(CodeAllocator* allocator) {
}
// Finalize instructions in assember;
- Finalize(allocator);
+ Finalize();
GetStackMapStream()->EndMethod(GetAssembler()->CodeSize());
}
-void CodeGenerator::Finalize(CodeAllocator* allocator) {
- size_t code_size = GetAssembler()->CodeSize();
- uint8_t* buffer = allocator->Allocate(code_size);
-
- MemoryRegion code(buffer, code_size);
- GetAssembler()->FinalizeInstructions(code);
+void CodeGenerator::Finalize() {
+ GetAssembler()->FinalizeCode();
}
void CodeGenerator::EmitLinkerPatches(
- ArenaVector<linker::LinkerPatch>* linker_patches ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] ArenaVector<linker::LinkerPatch>* linker_patches) {
// No linker patches by default.
}
-bool CodeGenerator::NeedsThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED) const {
+bool CodeGenerator::NeedsThunkCode([[maybe_unused]] const linker::LinkerPatch& patch) const {
// Code generators that create patches requiring thunk compilation should override this function.
return false;
}
-void CodeGenerator::EmitThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED,
- /*out*/ ArenaVector<uint8_t>* code ATTRIBUTE_UNUSED,
- /*out*/ std::string* debug_name ATTRIBUTE_UNUSED) {
+void CodeGenerator::EmitThunkCode([[maybe_unused]] const linker::LinkerPatch& patch,
+ [[maybe_unused]] /*out*/ ArenaVector<uint8_t>* code,
+ [[maybe_unused]] /*out*/ std::string* debug_name) {
// Code generators that create patches requiring thunk compilation should override this function.
LOG(FATAL) << "Unexpected call to EmitThunkCode().";
}
@@ -745,8 +627,8 @@ void CodeGenerator::CreateUnresolvedFieldLocationSummary(
locations->SetOut(calling_convention.GetReturnLocation(field_type));
}
} else {
- size_t set_index = is_instance ? 1 : 0;
- if (DataType::IsFloatingPointType(field_type)) {
+ size_t set_index = is_instance ? 1 : 0;
+ if (DataType::IsFloatingPointType(field_type)) {
// The set value comes from a float location while the calling convention
// expects it in a regular register location. Allocate a temp for it and
// make the transfer at codegen.
@@ -1028,6 +910,12 @@ std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph,
new (allocator) arm64::CodeGeneratorARM64(graph, compiler_options, stats));
}
#endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+ case InstructionSet::kRiscv64: {
+ return std::unique_ptr<CodeGenerator>(
+ new (allocator) riscv64::CodeGeneratorRISCV64(graph, compiler_options, stats));
+ }
+#endif
#ifdef ART_ENABLE_CODEGEN_x86
case InstructionSet::kX86: {
return std::unique_ptr<CodeGenerator>(
@@ -1834,8 +1722,8 @@ void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary*
void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) {
// Check to see if we have known failures that will cause us to have to bail out
// to the runtime, and just generate the runtime call directly.
- HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
- HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+ HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
+ HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
// The positions must be non-negative.
if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
@@ -1845,7 +1733,7 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) {
}
// The length must be >= 0.
- HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+ HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
if (length != nullptr) {
int32_t len = length->GetValue();
if (len < 0) {
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 9872efaa4a..cd44fb3fa7 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -59,8 +59,12 @@ static int32_t constexpr kPrimIntMax = 0x7fffffff;
// Maximum value for a primitive long.
static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
-static const ReadBarrierOption gCompilerReadBarrierOption =
- gUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
+// Depending on configuration, `gUseReadBarrier` can be a static const variable.
+// Static variable initialization order across different compilation units is not defined,
+// so function is used instead of static variable `gCompilerReadBarrierOption`.
+inline ReadBarrierOption GetCompilerReadBarrierOption() {
+ return gUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
+}
constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
constexpr size_t status_byte_offset =
@@ -73,6 +77,7 @@ constexpr uint32_t shifted_initialized_value =
enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
class Assembler;
+class CodeGenerationData;
class CodeGenerator;
class CompilerOptions;
class StackMapStream;
@@ -82,18 +87,6 @@ namespace linker {
class LinkerPatch;
} // namespace linker
-class CodeAllocator {
- public:
- CodeAllocator() {}
- virtual ~CodeAllocator() {}
-
- virtual uint8_t* Allocate(size_t size) = 0;
- virtual ArrayRef<const uint8_t> GetMemory() const = 0;
-
- private:
- DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
-};
-
class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> {
public:
explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) {
@@ -200,7 +193,7 @@ class FieldAccessCallingConvention {
class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
public:
// Compiles the graph to executable instructions.
- void Compile(CodeAllocator* allocator);
+ void Compile();
static std::unique_ptr<CodeGenerator> Create(HGraph* graph,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats = nullptr);
@@ -221,7 +214,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
}
virtual void Initialize() = 0;
- virtual void Finalize(CodeAllocator* allocator);
+ virtual void Finalize();
virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches);
virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const;
virtual void EmitThunkCode(const linker::LinkerPatch& patch,
@@ -278,14 +271,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
}
- static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
- uint32_t mask = 0;
- for (size_t i = 0, e = length; i < e; ++i) {
- mask |= (1 << registers[i]);
- }
- return mask;
- }
-
virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
virtual InstructionSet GetInstructionSet() const = 0;
@@ -731,6 +716,11 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
static QuickEntrypointEnum GetArrayAllocationEntrypoint(HNewArray* new_array);
static ScaleFactor ScaleFactorForType(DataType::Type type);
+ ArrayRef<const uint8_t> GetCode() const {
+ return ArrayRef<const uint8_t>(GetAssembler().CodeBufferBaseAddress(),
+ GetAssembler().CodeSize());
+ }
+
protected:
// Patch info used for recording locations of required linker patches and their targets,
// i.e. target method, string, type or code identified by their dex file and index,
@@ -761,6 +751,15 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
virtual HGraphVisitor* GetLocationBuilder() = 0;
virtual HGraphVisitor* GetInstructionVisitor() = 0;
+ template <typename RegType>
+ static uint32_t ComputeRegisterMask(const RegType* registers, size_t length) {
+ uint32_t mask = 0;
+ for (size_t i = 0, e = length; i < e; ++i) {
+ mask |= (1 << registers[i]);
+ }
+ return mask;
+ }
+
// Returns the location of the first spilled entry for floating point registers,
// relative to the stack pointer.
uint32_t GetFpuSpillStart() const {
@@ -814,6 +813,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
StackMapStream* GetStackMapStream();
+ CodeGenerationData* GetCodeGenerationData() {
+ return code_generation_data_.get();
+ }
+
void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string);
uint64_t GetJitStringRootIndex(StringReference string_reference);
void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass);
@@ -848,8 +851,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
DisassemblyInformation* disasm_info_;
private:
- class CodeGenerationData;
-
void InitializeCodeGenerationData();
size_t GetStackOffsetOfSavedRegister(size_t index);
void GenerateSlowPaths();
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 41db9a2542..89172aaebc 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -35,6 +35,7 @@
#include "interpreter/mterp/nterp.h"
#include "intrinsics.h"
#include "intrinsics_arm64.h"
+#include "intrinsics_list.h"
#include "intrinsics_utils.h"
#include "linker/linker_patch.h"
#include "lock_word.h"
@@ -45,6 +46,7 @@
#include "optimizing/common_arm64.h"
#include "optimizing/nodes.h"
#include "thread.h"
+#include "trace.h"
#include "utils/arm64/assembler_arm64.h"
#include "utils/assembler.h"
#include "utils/stack_checks.h"
@@ -88,6 +90,9 @@ using helpers::VIXLRegCodeFromART;
using helpers::WRegisterFrom;
using helpers::XRegisterFrom;
+// TODO(mythria): Expand SystemRegister in vixl to include this value.
+uint16_t SYS_CNTVCT_EL0 = SystemRegisterEncoder<1, 3, 14, 0, 2>::value;
+
// The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
// table version generates 7 instructions and num_entries literals. Compare/jump sequence will
// generates less code/data with a small num_entries.
@@ -936,6 +941,7 @@ Location CriticalNativeCallingConventionVisitorARM64::GetMethodLocation() const
}
namespace detail {
+
// Mark which intrinsics we don't have handcrafted code for.
template <Intrinsics T>
struct IsUnimplemented {
@@ -950,15 +956,13 @@ struct IsUnimplemented {
UNIMPLEMENTED_INTRINSIC_LIST_ARM64(TRUE_OVERRIDE)
#undef TRUE_OVERRIDE
-#include "intrinsics_list.h"
static constexpr bool kIsIntrinsicUnimplemented[] = {
- false, // kNone
+ false, // kNone
#define IS_UNIMPLEMENTED(Intrinsic, ...) \
- IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
- INTRINSICS_LIST(IS_UNIMPLEMENTED)
+ IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+ ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
#undef IS_UNIMPLEMENTED
};
-#undef INTRINSICS_LIST
} // namespace detail
@@ -995,14 +999,7 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
- uint32_literals_(std::less<uint32_t>(),
- graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
- uint64_literals_(std::less<uint64_t>(),
- graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
- jit_string_patches_(StringReferenceValueComparator(),
- graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
- jit_class_patches_(TypeReferenceValueComparator(),
- graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ jit_patches_(&assembler_, graph->GetAllocator()),
jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
// Save the link register (containing the return address) to mimic Quick.
@@ -1036,7 +1033,7 @@ void CodeGeneratorARM64::EmitJumpTables() {
}
}
-void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
+void CodeGeneratorARM64::Finalize() {
EmitJumpTables();
// Emit JIT baker read barrier slow paths.
@@ -1051,11 +1048,11 @@ void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
// Ensure we emit the literal pool.
__ FinalizeCode();
- CodeGenerator::Finalize(allocator);
+ CodeGenerator::Finalize();
// Verify Baker read barrier linker patches.
if (kIsDebugBuild) {
- ArrayRef<const uint8_t> code = allocator->GetMemory();
+ ArrayRef<const uint8_t> code(GetCode());
for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
DCHECK(info.label.IsBound());
uint32_t literal_offset = info.label.GetLocation();
@@ -1192,8 +1189,9 @@ void LocationsBuilderARM64::VisitMethodExitHook(HMethodExitHook* method_hook) {
void InstructionCodeGeneratorARM64::GenerateMethodEntryExitHook(HInstruction* instruction) {
MacroAssembler* masm = GetVIXLAssembler();
UseScratchRegisterScope temps(masm);
- Register temp = temps.AcquireX();
- Register value = temps.AcquireW();
+ Register addr = temps.AcquireX();
+ Register index = temps.AcquireX();
+ Register value = index.W();
SlowPathCodeARM64* slow_path =
new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARM64(instruction);
@@ -1213,9 +1211,44 @@ void InstructionCodeGeneratorARM64::GenerateMethodEntryExitHook(HInstruction* in
MemberOffset offset = instruction->IsMethodExitHook() ?
instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
- __ Mov(temp, address + offset.Int32Value());
- __ Ldrb(value, MemOperand(temp, 0));
- __ Cbnz(value, slow_path->GetEntryLabel());
+ __ Mov(addr, address + offset.Int32Value());
+ __ Ldrb(value, MemOperand(addr, 0));
+ __ Cmp(value, Operand(instrumentation::Instrumentation::kFastTraceListeners));
+ // Check if there are any method entry / exit listeners. If no, continue.
+ __ B(lt, slow_path->GetExitLabel());
+ // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
+ // If yes, just take the slow path.
+ __ B(gt, slow_path->GetEntryLabel());
+
+ // Check if there is place in the buffer to store a new entry, if no, take slow path.
+ uint32_t trace_buffer_index_offset =
+ Thread::TraceBufferIndexOffset<kArm64PointerSize>().Int32Value();
+ __ Ldr(index, MemOperand(tr, trace_buffer_index_offset));
+ __ Subs(index, index, kNumEntriesForWallClock);
+ __ B(lt, slow_path->GetEntryLabel());
+
+ // Update the index in the `Thread`.
+ __ Str(index, MemOperand(tr, trace_buffer_index_offset));
+ // Calculate the entry address in the buffer.
+ // addr = base_addr + sizeof(void*) * index;
+ __ Ldr(addr, MemOperand(tr, Thread::TraceBufferPtrOffset<kArm64PointerSize>().SizeValue()));
+ __ ComputeAddress(addr, MemOperand(addr, index, LSL, TIMES_8));
+
+ Register tmp = index;
+ // Record method pointer and trace action.
+ __ Ldr(tmp, MemOperand(sp, 0));
+ // Use last two bits to encode trace method action. For MethodEntry it is 0
+ // so no need to set the bits since they are 0 already.
+ if (instruction->IsMethodExitHook()) {
+ DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
+ static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
+ static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
+ __ Orr(tmp, tmp, Operand(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
+ }
+ __ Str(tmp, MemOperand(addr, kMethodOffsetInBytes));
+ // Record the timestamp.
+ __ Mrs(tmp, (SystemRegister)SYS_CNTVCT_EL0);
+ __ Str(tmp, MemOperand(addr, kTimestampOffsetInBytes));
__ Bind(slow_path->GetExitLabel());
}
@@ -1264,7 +1297,7 @@ void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) {
UseScratchRegisterScope temps(masm);
Register temp = temps.AcquireX();
Register counter = temps.AcquireW();
- __ Ldr(temp, DeduplicateUint64Literal(address));
+ __ Ldr(temp, jit_patches_.DeduplicateUint64Literal(address));
__ Ldrh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
__ Cbz(counter, slow_path->GetEntryLabel());
__ Add(counter, counter, -1);
@@ -1532,15 +1565,15 @@ size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_
return kArm64WordSize;
}
-size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
- uint32_t reg_id ATTRIBUTE_UNUSED) {
+size_t CodeGeneratorARM64::SaveFloatingPointRegister([[maybe_unused]] size_t stack_index,
+ [[maybe_unused]] uint32_t reg_id) {
LOG(FATAL) << "FP registers shouldn't be saved/restored individually, "
<< "use SaveRestoreLiveRegistersHelper";
UNREACHABLE();
}
-size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
- uint32_t reg_id ATTRIBUTE_UNUSED) {
+size_t CodeGeneratorARM64::RestoreFloatingPointRegister([[maybe_unused]] size_t stack_index,
+ [[maybe_unused]] uint32_t reg_id) {
LOG(FATAL) << "FP registers shouldn't be saved/restored individually, "
<< "use SaveRestoreLiveRegistersHelper";
UNREACHABLE();
@@ -3647,7 +3680,7 @@ void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) {
}
void InstructionCodeGeneratorARM64::VisitDoubleConstant(
- HDoubleConstant* constant ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HDoubleConstant* constant) {
// Will be generated at use site.
}
@@ -3655,8 +3688,7 @@ void LocationsBuilderARM64::VisitExit(HExit* exit) {
exit->SetLocations(nullptr);
}
-void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
-}
+void InstructionCodeGeneratorARM64::VisitExit([[maybe_unused]] HExit* exit) {}
void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
LocationSummary* locations =
@@ -3664,7 +3696,7 @@ void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
// Will be generated at use site.
}
@@ -3747,7 +3779,7 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct
// The condition instruction has been materialized, compare the output to 0.
Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
DCHECK(cond_val.IsRegister());
- if (true_target == nullptr) {
+ if (true_target == nullptr) {
__ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
} else {
__ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
@@ -3876,7 +3908,7 @@ static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
}
static inline Condition GetConditionForSelect(HCondition* condition) {
- IfCondition cond = condition->AsCondition()->GetCondition();
+ IfCondition cond = condition->GetCondition();
return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias())
: ARM64Condition(cond);
}
@@ -3888,8 +3920,8 @@ void LocationsBuilderARM64::VisitSelect(HSelect* select) {
locations->SetInAt(1, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
} else {
- HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
- HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
+ HConstant* cst_true_value = select->GetTrueValue()->AsConstantOrNull();
+ HConstant* cst_false_value = select->GetFalseValue()->AsConstantOrNull();
bool is_true_value_constant = cst_true_value != nullptr;
bool is_false_value_constant = cst_false_value != nullptr;
// Ask VIXL whether we should synthesize constants in registers.
@@ -4308,7 +4340,6 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
} else {
locations->SetInAt(1, Location::RequiresRegister());
}
- // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
@@ -4478,12 +4509,11 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
kWithoutReadBarrier);
// /* HeapReference<Class> */ temp = temp->iftable_
- GenerateReferenceLoadTwoRegisters(instruction,
- temp_loc,
- temp_loc,
- iftable_offset,
- maybe_temp2_loc,
- kWithoutReadBarrier);
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ iftable_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// Iftable is never null.
__ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset));
// Loop through the iftable and check if any class matches.
@@ -4525,7 +4555,7 @@ void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
// Will be generated at use site.
}
@@ -4534,7 +4564,7 @@ void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
// Will be generated at use site.
}
@@ -4709,8 +4739,8 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codege
}
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
- const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- ArtMethod* method ATTRIBUTE_UNUSED) {
+ const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+ [[maybe_unused]] ArtMethod* method) {
// On ARM64 we support all dispatch types.
return desired_dispatch_info;
}
@@ -4749,7 +4779,8 @@ void CodeGeneratorARM64::LoadMethod(MethodLoadKind load_kind, Location temp, HIn
case MethodLoadKind::kJitDirectAddress: {
// Load method address from literal pool.
__ Ldr(XRegisterFrom(temp),
- DeduplicateUint64Literal(reinterpret_cast<uint64_t>(invoke->GetResolvedMethod())));
+ jit_patches_.DeduplicateUint64Literal(
+ reinterpret_cast<uint64_t>(invoke->GetResolvedMethod())));
break;
}
case MethodLoadKind::kRuntimeCall: {
@@ -4775,14 +4806,12 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(
__ Ldr(XRegisterFrom(temp), MemOperand(tr, offset));
break;
}
- case MethodLoadKind::kRecursive: {
+ case MethodLoadKind::kRecursive:
callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
break;
- }
- case MethodLoadKind::kRuntimeCall: {
+ case MethodLoadKind::kRuntimeCall:
GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
return; // No code pointer retrieval; the runtime performs the call directly.
- }
case MethodLoadKind::kBootImageLinkTimePcRelative:
DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
@@ -4798,10 +4827,9 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(
break;
}
FALLTHROUGH_INTENDED;
- default: {
+ default:
LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
break;
- }
}
auto call_lr = [&]() {
@@ -4906,6 +4934,7 @@ void CodeGeneratorARM64::GenerateVirtualCall(
}
// Instead of simply (possibly) unpoisoning `temp` here, we should
// emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
// intermediate/temporary reference and because the current
// concurrent copying collector keeps the from-space memory
// intact/accessible until the end of the marking phase (the
@@ -5090,25 +5119,8 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
return label;
}
-vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
- uint64_t address) {
- return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address));
-}
-
-vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
- const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) {
- ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
- return jit_string_patches_.GetOrCreate(
- StringReference(&dex_file, string_index),
- [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); });
-}
-
-vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral(
- const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) {
- ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
- return jit_class_patches_.GetOrCreate(
- TypeReference(&dex_file, type_index),
- [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); });
+void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
+ jit_patches_.EmitJitRootPatches(code, roots_data, *GetCodeGenerationData());
}
void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
@@ -5332,19 +5344,7 @@ void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch,
assembler.FinalizeCode();
code->resize(assembler.CodeSize());
MemoryRegion code_region(code->data(), code->size());
- assembler.FinalizeInstructions(code_region);
-}
-
-vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) {
- return uint32_literals_.GetOrCreate(
- value,
- [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); });
-}
-
-vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
- return uint64_literals_.GetOrCreate(
- value,
- [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
+ assembler.CopyInstructions(code_region);
}
void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
@@ -5370,13 +5370,8 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
return;
}
- {
- // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
- // are no pools emitted.
- EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
- codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
- DCHECK(!codegen_->IsLeafMethod());
- }
+ codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
+ DCHECK(!codegen_->IsLeafMethod());
codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}
@@ -5434,7 +5429,9 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
locations->SetInAt(0, Location::RequiresRegister());
}
locations->SetOut(Location::RequiresRegister());
- if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
+ if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
if (!gUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the type resolution or initialization and marking to save everything we need.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
@@ -5460,9 +5457,8 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
Location out_loc = cls->GetLocations()->Out();
Register out = OutputRegister(cls);
- const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
- ? kWithoutReadBarrier
- : gCompilerReadBarrierOption;
+ const ReadBarrierOption read_barrier_option =
+ cls->IsInBootImage() ? kWithoutReadBarrier : GetCompilerReadBarrierOption();
bool generate_null_check = false;
switch (load_kind) {
case HLoadClass::LoadKind::kReferrersClass: {
@@ -5600,7 +5596,7 @@ void LocationsBuilderARM64::VisitClearException(HClearException* clear) {
new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
}
-void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitClearException([[maybe_unused]] HClearException* clear) {
__ Str(wzr, GetExceptionTlsAddress());
}
@@ -5685,7 +5681,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
temp,
/* offset placeholder */ 0u,
ldr_label,
- gCompilerReadBarrierOption);
+ GetCompilerReadBarrierOption());
SlowPathCodeARM64* slow_path =
new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load);
codegen_->AddSlowPath(slow_path);
@@ -5709,14 +5705,13 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
out.X(),
/* offset= */ 0,
/* fixup_label= */ nullptr,
- gCompilerReadBarrierOption);
+ GetCompilerReadBarrierOption());
return;
}
default:
break;
}
- // TODO: Re-add the compiler code to do string dex cache lookup again.
InvokeRuntimeCallingConvention calling_convention;
DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
__ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
@@ -5730,7 +5725,7 @@ void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
// Will be generated at use site.
}
@@ -5930,7 +5925,7 @@ void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) {
HandleBinaryOp(instruction);
}
-void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderARM64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
LOG(FATAL) << "Unreachable";
}
@@ -5957,7 +5952,7 @@ void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
}
void InstructionCodeGeneratorARM64::VisitParameterValue(
- HParameterValue* instruction ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HParameterValue* instruction) {
// Nothing to do, the parameter is already at its location.
}
@@ -5968,7 +5963,7 @@ void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) {
}
void InstructionCodeGeneratorARM64::VisitCurrentMethod(
- HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HCurrentMethod* instruction) {
// Nothing to do, the method is already at its location.
}
@@ -5980,7 +5975,7 @@ void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
locations->SetOut(Location::Any());
}
-void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitPhi([[maybe_unused]] HPhi* instruction) {
LOG(FATAL) << "Unreachable";
}
@@ -6175,7 +6170,7 @@ void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor
}
void InstructionCodeGeneratorARM64::VisitConstructorFence(
- HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HConstructorFence* constructor_fence) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
}
@@ -6215,7 +6210,7 @@ void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
instruction->SetLocations(nullptr);
}
-void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitReturnVoid([[maybe_unused]] HReturnVoid* instruction) {
codegen_->GenerateFrameExit();
}
@@ -6353,6 +6348,9 @@ void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
// In suspend check slow path, usually there are no caller-save registers at all.
// If SIMD instructions are present, however, we force spilling all live SIMD
// registers in full width (since the runtime only saves/restores lower part).
+ // Note that only a suspend check can see live SIMD registers. In the
+ // loop optimization, we make sure this does not happen for any other slow
+ // path.
locations->SetCustomSlowPathCallerSaves(
GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
}
@@ -6467,12 +6465,12 @@ void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
HandleBinaryOp(instruction);
}
-void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderARM64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
// Nothing to do, this should be removed during prepare for register allocator.
LOG(FATAL) << "Unreachable";
}
-void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
// Nothing to do, this should be removed during prepare for register allocator.
LOG(FATAL) << "Unreachable";
}
@@ -7018,32 +7016,6 @@ void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instructi
}
}
-static void PatchJitRootUse(uint8_t* code,
- const uint8_t* roots_data,
- vixl::aarch64::Literal<uint32_t>* literal,
- uint64_t index_in_table) {
- uint32_t literal_offset = literal->GetOffset();
- uintptr_t address =
- reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
- uint8_t* data = code + literal_offset;
- reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
-}
-
-void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
- for (const auto& entry : jit_string_patches_) {
- const StringReference& string_reference = entry.first;
- vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
- uint64_t index_in_table = GetJitStringRootIndex(string_reference);
- PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
- }
- for (const auto& entry : jit_class_patches_) {
- const TypeReference& type_reference = entry.first;
- vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
- uint64_t index_in_table = GetJitClassRootIndex(type_reference);
- PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
- }
-}
-
MemOperand InstructionCodeGeneratorARM64::VecNEONAddress(
HVecMemoryOperation* instruction,
UseScratchRegisterScope* temps_scope,
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 6190364d1d..957f85aa21 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -26,6 +26,7 @@
#include "dex/string_reference.h"
#include "dex/type_reference.h"
#include "driver/compiler_options.h"
+#include "jit_patches_arm64.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
#include "utils/arm64/assembler_arm64.h"
@@ -50,30 +51,29 @@ class CodeGeneratorARM64;
// Use a local definition to prevent copying mistakes.
static constexpr size_t kArm64WordSize = static_cast<size_t>(kArm64PointerSize);
-// These constants are used as an approximate margin when emission of veneer and literal pools
+// This constant is used as an approximate margin when emission of veneer and literal pools
// must be blocked.
static constexpr int kMaxMacroInstructionSizeInBytes = 15 * vixl::aarch64::kInstructionSize;
-static constexpr int kInvokeCodeMarginSizeInBytes = 6 * kMaxMacroInstructionSizeInBytes;
static const vixl::aarch64::Register kParameterCoreRegisters[] = {
- vixl::aarch64::x1,
- vixl::aarch64::x2,
- vixl::aarch64::x3,
- vixl::aarch64::x4,
- vixl::aarch64::x5,
- vixl::aarch64::x6,
- vixl::aarch64::x7
+ vixl::aarch64::x1,
+ vixl::aarch64::x2,
+ vixl::aarch64::x3,
+ vixl::aarch64::x4,
+ vixl::aarch64::x5,
+ vixl::aarch64::x6,
+ vixl::aarch64::x7
};
static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
static const vixl::aarch64::VRegister kParameterFPRegisters[] = {
- vixl::aarch64::d0,
- vixl::aarch64::d1,
- vixl::aarch64::d2,
- vixl::aarch64::d3,
- vixl::aarch64::d4,
- vixl::aarch64::d5,
- vixl::aarch64::d6,
- vixl::aarch64::d7
+ vixl::aarch64::d0,
+ vixl::aarch64::d1,
+ vixl::aarch64::d2,
+ vixl::aarch64::d3,
+ vixl::aarch64::d4,
+ vixl::aarch64::d5,
+ vixl::aarch64::d6,
+ vixl::aarch64::d7
};
static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters);
@@ -116,7 +116,7 @@ const vixl::aarch64::CPURegList callee_saved_core_registers(
vixl::aarch64::CPURegister::kRegister,
vixl::aarch64::kXRegSize,
(kReserveMarkingRegister ? vixl::aarch64::x21.GetCode() : vixl::aarch64::x20.GetCode()),
- vixl::aarch64::x30.GetCode());
+ vixl::aarch64::x30.GetCode());
const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegister::kVRegister,
vixl::aarch64::kDRegSize,
vixl::aarch64::d8.GetCode(),
@@ -192,34 +192,34 @@ class JumpTableARM64 : public DeletableArenaObject<kArenaAllocSwitchTable> {
DISALLOW_COPY_AND_ASSIGN(JumpTableARM64);
};
-static const vixl::aarch64::Register kRuntimeParameterCoreRegisters[] =
- { vixl::aarch64::x0,
- vixl::aarch64::x1,
- vixl::aarch64::x2,
- vixl::aarch64::x3,
- vixl::aarch64::x4,
- vixl::aarch64::x5,
- vixl::aarch64::x6,
- vixl::aarch64::x7 };
+static const vixl::aarch64::Register kRuntimeParameterCoreRegisters[] = {
+ vixl::aarch64::x0,
+ vixl::aarch64::x1,
+ vixl::aarch64::x2,
+ vixl::aarch64::x3,
+ vixl::aarch64::x4,
+ vixl::aarch64::x5,
+ vixl::aarch64::x6,
+ vixl::aarch64::x7
+};
static constexpr size_t kRuntimeParameterCoreRegistersLength =
arraysize(kRuntimeParameterCoreRegisters);
-static const vixl::aarch64::VRegister kRuntimeParameterFpuRegisters[] =
- { vixl::aarch64::d0,
- vixl::aarch64::d1,
- vixl::aarch64::d2,
- vixl::aarch64::d3,
- vixl::aarch64::d4,
- vixl::aarch64::d5,
- vixl::aarch64::d6,
- vixl::aarch64::d7 };
+static const vixl::aarch64::VRegister kRuntimeParameterFpuRegisters[] = {
+ vixl::aarch64::d0,
+ vixl::aarch64::d1,
+ vixl::aarch64::d2,
+ vixl::aarch64::d3,
+ vixl::aarch64::d4,
+ vixl::aarch64::d5,
+ vixl::aarch64::d6,
+ vixl::aarch64::d7
+};
static constexpr size_t kRuntimeParameterFpuRegistersLength =
arraysize(kRuntimeParameterCoreRegisters);
class InvokeRuntimeCallingConvention : public CallingConvention<vixl::aarch64::Register,
vixl::aarch64::VRegister> {
public:
- static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
-
InvokeRuntimeCallingConvention()
: CallingConvention(kRuntimeParameterCoreRegisters,
kRuntimeParameterCoreRegistersLength,
@@ -304,16 +304,16 @@ class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention {
Location GetFieldIndexLocation() const override {
return helpers::LocationFrom(vixl::aarch64::x0);
}
- Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
+ Location GetReturnLocation([[maybe_unused]] DataType::Type type) const override {
return helpers::LocationFrom(vixl::aarch64::x0);
}
- Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED,
+ Location GetSetValueLocation([[maybe_unused]] DataType::Type type,
bool is_instance) const override {
return is_instance
? helpers::LocationFrom(vixl::aarch64::x2)
: helpers::LocationFrom(vixl::aarch64::x1);
}
- Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
+ Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override {
return helpers::LocationFrom(vixl::aarch64::d0);
}
@@ -551,12 +551,31 @@ class InstructionCodeGeneratorARM64Sve : public InstructionCodeGeneratorARM64 {
// register size (full SIMD register is used).
void ValidateVectorLength(HVecOperation* instr) const;
- // Returns default predicate register which is used as governing vector predicate
- // to implement predicated loop execution.
+ vixl::aarch64::PRegister GetVecGoverningPReg(HVecOperation* instr) {
+ return GetVecPredSetFixedOutPReg(instr->GetGoverningPredicate());
+ }
+
+ // Returns a fixed p-reg for predicate setting instruction.
+ //
+ // Currently we only support diamond CF loops for predicated vectorization; also we don't have
+ // register allocator support for vector predicates. Thus we use fixed P-regs for loop main,
+ // True and False predicates as a temporary solution.
//
- // TODO: This is a hack to be addressed when register allocator supports SIMD types.
- static vixl::aarch64::PRegister LoopPReg() {
- return vixl::aarch64::p0;
+ // TODO: Support SIMD types and registers in ART.
+ static vixl::aarch64::PRegister GetVecPredSetFixedOutPReg(HVecPredSetOperation* instr) {
+ if (instr->IsVecPredWhile() || instr->IsVecPredSetAll()) {
+ // VecPredWhile and VecPredSetAll live ranges never overlap due to the current vectorization
+ // scheme: the former only is live inside a vectorized loop and the later is never in a
+ // loop and never spans across loops.
+ return vixl::aarch64::p0;
+ } else if (instr->IsVecPredNot()) {
+ // This relies on the fact that we only use PredNot manually in the autovectorizer,
+ // so there is only one of them in each loop.
+ return vixl::aarch64::p1;
+ } else {
+ DCHECK(instr->IsVecCondition());
+ return vixl::aarch64::p2;
+ }
}
};
@@ -698,7 +717,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
return jump_tables_.back().get();
}
- void Finalize(CodeAllocator* allocator) override;
+ void Finalize() override;
// Code generation helpers.
void MoveConstant(vixl::aarch64::CPURegister destination, HConstant* constant);
@@ -737,9 +756,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
ParallelMoveResolverARM64* GetMoveResolver() override { return &move_resolver_; }
- bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override {
- return false;
- }
+ bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; }
// Check if the desired_string_load_kind is supported. If it is, return it,
// otherwise return a fall-back kind that should be used instead.
@@ -838,13 +855,21 @@ class CodeGeneratorARM64 : public CodeGenerator {
// the associated patch for AOT or slow path for JIT.
void EmitBakerReadBarrierCbnz(uint32_t custom_data);
- vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
+ vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address) {
+ return jit_patches_.DeduplicateBootImageAddressLiteral(address);
+ }
vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(const DexFile& dex_file,
dex::StringIndex string_index,
- Handle<mirror::String> handle);
+ Handle<mirror::String> handle) {
+ return jit_patches_.DeduplicateJitStringLiteral(
+ dex_file, string_index, handle, GetCodeGenerationData());
+ }
vixl::aarch64::Literal<uint32_t>* DeduplicateJitClassLiteral(const DexFile& dex_file,
- dex::TypeIndex string_index,
- Handle<mirror::Class> handle);
+ dex::TypeIndex class_index,
+ Handle<mirror::Class> handle) {
+ return jit_patches_.DeduplicateJitClassLiteral(
+ dex_file, class_index, handle, GetCodeGenerationData());
+ }
void EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, vixl::aarch64::Register reg);
void EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
@@ -1074,18 +1099,6 @@ class CodeGeneratorARM64 : public CodeGenerator {
uint32_t encoded_data,
/*out*/ std::string* debug_name);
- using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>;
- using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>;
- using StringToLiteralMap = ArenaSafeMap<StringReference,
- vixl::aarch64::Literal<uint32_t>*,
- StringReferenceValueComparator>;
- using TypeToLiteralMap = ArenaSafeMap<TypeReference,
- vixl::aarch64::Literal<uint32_t>*,
- TypeReferenceValueComparator>;
-
- vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value);
- vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
-
// The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types,
// whether through .data.bimg.rel.ro, .bss, or directly in the boot image.
struct PcRelativePatchInfo : PatchInfo<vixl::aarch64::Label> {
@@ -1158,14 +1171,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
// Baker read barrier patch info.
ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
- // Deduplication map for 32-bit literals, used for JIT for boot image addresses.
- Uint32ToLiteralMap uint32_literals_;
- // Deduplication map for 64-bit literals, used for JIT for method address or method code.
- Uint64ToLiteralMap uint64_literals_;
- // Patches for string literals in JIT compiled code.
- StringToLiteralMap jit_string_patches_;
- // Patches for class literals in JIT compiled code.
- TypeToLiteralMap jit_class_patches_;
+ JitPatchesARM64 jit_patches_;
// Baker read barrier slow paths, mapping custom data (uint32_t) to label.
// Wrap the label to work around vixl::aarch64::Label being non-copyable
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index d69e77045b..78bf316c17 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -33,6 +33,7 @@
#include "interpreter/mterp/nterp.h"
#include "intrinsics.h"
#include "intrinsics_arm_vixl.h"
+#include "intrinsics_list.h"
#include "intrinsics_utils.h"
#include "linker/linker_patch.h"
#include "mirror/array-inl.h"
@@ -40,6 +41,7 @@
#include "mirror/var_handle.h"
#include "scoped_thread_state_change-inl.h"
#include "thread.h"
+#include "trace.h"
#include "utils/arm/assembler_arm_vixl.h"
#include "utils/arm/managed_register_arm.h"
#include "utils/assembler.h"
@@ -1102,27 +1104,27 @@ static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) {
}
// Saves the register in the stack. Returns the size taken on stack.
-size_t CodeGeneratorARMVIXL::SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
- uint32_t reg_id ATTRIBUTE_UNUSED) {
+size_t CodeGeneratorARMVIXL::SaveCoreRegister([[maybe_unused]] size_t stack_index,
+ [[maybe_unused]] uint32_t reg_id) {
TODO_VIXL32(FATAL);
UNREACHABLE();
}
// Restores the register from the stack. Returns the size taken on stack.
-size_t CodeGeneratorARMVIXL::RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
- uint32_t reg_id ATTRIBUTE_UNUSED) {
+size_t CodeGeneratorARMVIXL::RestoreCoreRegister([[maybe_unused]] size_t stack_index,
+ [[maybe_unused]] uint32_t reg_id) {
TODO_VIXL32(FATAL);
UNREACHABLE();
}
-size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
- uint32_t reg_id ATTRIBUTE_UNUSED) {
+size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister([[maybe_unused]] size_t stack_index,
+ [[maybe_unused]] uint32_t reg_id) {
TODO_VIXL32(FATAL);
UNREACHABLE();
}
-size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
- uint32_t reg_id ATTRIBUTE_UNUSED) {
+size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister([[maybe_unused]] size_t stack_index,
+ [[maybe_unused]] uint32_t reg_id) {
TODO_VIXL32(FATAL);
UNREACHABLE();
}
@@ -1908,6 +1910,7 @@ vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction,
}
namespace detail {
+
// Mark which intrinsics we don't have handcrafted code for.
template <Intrinsics T>
struct IsUnimplemented {
@@ -1922,15 +1925,13 @@ struct IsUnimplemented {
UNIMPLEMENTED_INTRINSIC_LIST_ARM(TRUE_OVERRIDE)
#undef TRUE_OVERRIDE
-#include "intrinsics_list.h"
static constexpr bool kIsIntrinsicUnimplemented[] = {
- false, // kNone
+ false, // kNone
#define IS_UNIMPLEMENTED(Intrinsic, ...) \
- IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
- INTRINSICS_LIST(IS_UNIMPLEMENTED)
+ IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+ ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
#undef IS_UNIMPLEMENTED
};
-#undef INTRINSICS_LIST
} // namespace detail
@@ -2024,7 +2025,7 @@ void CodeGeneratorARMVIXL::FixJumpTables() {
#define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()-> // NOLINT
-void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) {
+void CodeGeneratorARMVIXL::Finalize() {
FixJumpTables();
// Emit JIT baker read barrier slow paths.
@@ -2037,11 +2038,11 @@ void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) {
}
GetAssembler()->FinalizeCode();
- CodeGenerator::Finalize(allocator);
+ CodeGenerator::Finalize();
// Verify Baker read barrier linker patches.
if (kIsDebugBuild) {
- ArrayRef<const uint8_t> code = allocator->GetMemory();
+ ArrayRef<const uint8_t> code(GetCode());
for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
DCHECK(info.label.IsBound());
uint32_t literal_offset = info.label.GetLocation();
@@ -2188,11 +2189,16 @@ void LocationsBuilderARMVIXL::VisitMethodExitHook(HMethodExitHook* method_hook)
LocationSummary* locations = new (GetGraph()->GetAllocator())
LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
locations->SetInAt(0, parameter_visitor_.GetReturnLocation(method_hook->InputAt(0)->GetType()));
+ // We need three temporary registers, two to load the timestamp counter (64-bit value) and one to
+ // compute the address to store the timestamp counter.
+ locations->AddRegisterTemps(3);
}
void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction* instruction) {
- UseScratchRegisterScope temps(GetVIXLAssembler());
- vixl32::Register temp = temps.Acquire();
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::Register addr = RegisterFrom(locations->GetTemp(0));
+ vixl32::Register value = RegisterFrom(locations->GetTemp(1));
+ vixl32::Register tmp = RegisterFrom(locations->GetTemp(2));
SlowPathCodeARMVIXL* slow_path =
new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARMVIXL(instruction);
@@ -2204,20 +2210,61 @@ void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction*
// if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
// disabled in debuggable runtime. The other bit is used when this method itself requires a
// deoptimization due to redefinition. So it is safe to just check for non-zero value here.
- GetAssembler()->LoadFromOffset(kLoadWord,
- temp,
- sp,
- codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
- __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
+ GetAssembler()->LoadFromOffset(
+ kLoadWord, value, sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
+ __ CompareAndBranchIfNonZero(value, slow_path->GetEntryLabel());
}
MemberOffset offset = instruction->IsMethodExitHook() ?
instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
uint32_t address = reinterpret_cast32<uint32_t>(Runtime::Current()->GetInstrumentation());
- __ Mov(temp, address + offset.Int32Value());
- __ Ldrb(temp, MemOperand(temp, 0));
- __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
+ __ Mov(addr, address + offset.Int32Value());
+ __ Ldrb(value, MemOperand(addr, 0));
+ __ Cmp(value, instrumentation::Instrumentation::kFastTraceListeners);
+ // Check if there are any trace method entry / exit listeners. If no, continue.
+ __ B(lt, slow_path->GetExitLabel());
+ // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
+ // If yes, just take the slow path.
+ __ B(gt, slow_path->GetEntryLabel());
+
+ // Check if there is place in the buffer to store a new entry, if no, take slow path.
+ uint32_t trace_buffer_index_offset =
+ Thread::TraceBufferIndexOffset<kArmPointerSize>().Int32Value();
+ vixl32::Register index = value;
+ __ Ldr(index, MemOperand(tr, trace_buffer_index_offset));
+ __ Subs(index, index, kNumEntriesForWallClock);
+ __ B(lt, slow_path->GetEntryLabel());
+
+ // Update the index in the `Thread`.
+ __ Str(index, MemOperand(tr, trace_buffer_index_offset));
+ // Calculate the entry address in the buffer.
+ // addr = base_addr + sizeof(void*) * index
+ __ Ldr(addr, MemOperand(tr, Thread::TraceBufferPtrOffset<kArmPointerSize>().SizeValue()));
+ __ Add(addr, addr, Operand(index, LSL, TIMES_4));
+
+ // Record method pointer and trace action.
+ __ Ldr(tmp, MemOperand(sp, 0));
+ // Use last two bits to encode trace method action. For MethodEntry it is 0
+ // so no need to set the bits since they are 0 already.
+ if (instruction->IsMethodExitHook()) {
+ DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
+ static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
+ static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
+ __ Orr(tmp, tmp, Operand(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
+ }
+ __ Str(tmp, MemOperand(addr, kMethodOffsetInBytes));
+
+ vixl32::Register tmp1 = index;
+ // See Architecture Reference Manual ARMv7-A and ARMv7-R edition section B4.1.34.
+ __ Mrrc(/* lower 32-bit */ tmp,
+ /* higher 32-bit */ tmp1,
+ /* coproc= */ 15,
+ /* opc1= */ 1,
+ /* crm= */ 14);
+ static_assert(kHighTimestampOffsetInBytes ==
+ kTimestampOffsetInBytes + static_cast<uint32_t>(kRuntimePointerSize));
+ __ Strd(tmp, tmp1, MemOperand(addr, kTimestampOffsetInBytes));
__ Bind(slow_path->GetExitLabel());
}
@@ -2228,7 +2275,11 @@ void InstructionCodeGeneratorARMVIXL::VisitMethodExitHook(HMethodExitHook* instr
}
void LocationsBuilderARMVIXL::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
- new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+ LocationSummary* locations = new (GetGraph()->GetAllocator())
+ LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+ // We need three temporary registers, two to load the timestamp counter (64-bit value) and one to
+ // compute the address to store the timestamp counter.
+ locations->AddRegisterTemps(3);
}
void InstructionCodeGeneratorARMVIXL::VisitMethodEntryHook(HMethodEntryHook* instruction) {
@@ -2824,8 +2875,7 @@ void LocationsBuilderARMVIXL::VisitExit(HExit* exit) {
exit->SetLocations(nullptr);
}
-void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
-}
+void InstructionCodeGeneratorARMVIXL::VisitExit([[maybe_unused]] HExit* exit) {}
void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
vixl32::Label* true_target,
@@ -3422,7 +3472,7 @@ void LocationsBuilderARMVIXL::VisitIntConstant(HIntConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorARMVIXL::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
// Will be generated at use site.
}
@@ -3432,7 +3482,7 @@ void LocationsBuilderARMVIXL::VisitNullConstant(HNullConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorARMVIXL::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
// Will be generated at use site.
}
@@ -3442,7 +3492,7 @@ void LocationsBuilderARMVIXL::VisitLongConstant(HLongConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorARMVIXL::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
// Will be generated at use site.
}
@@ -3453,7 +3503,7 @@ void LocationsBuilderARMVIXL::VisitFloatConstant(HFloatConstant* constant) {
}
void InstructionCodeGeneratorARMVIXL::VisitFloatConstant(
- HFloatConstant* constant ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HFloatConstant* constant) {
// Will be generated at use site.
}
@@ -3464,7 +3514,7 @@ void LocationsBuilderARMVIXL::VisitDoubleConstant(HDoubleConstant* constant) {
}
void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant(
- HDoubleConstant* constant ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HDoubleConstant* constant) {
// Will be generated at use site.
}
@@ -3473,7 +3523,7 @@ void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* construct
}
void InstructionCodeGeneratorARMVIXL::VisitConstructorFence(
- HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HConstructorFence* constructor_fence) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
}
@@ -3489,7 +3539,7 @@ void LocationsBuilderARMVIXL::VisitReturnVoid(HReturnVoid* ret) {
ret->SetLocations(nullptr);
}
-void InstructionCodeGeneratorARMVIXL::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
codegen_->GenerateFrameExit();
}
@@ -5617,7 +5667,7 @@ void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction)
}
void InstructionCodeGeneratorARMVIXL::VisitParameterValue(
- HParameterValue* instruction ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HParameterValue* instruction) {
// Nothing to do, the parameter is already at its location.
}
@@ -5628,7 +5678,7 @@ void LocationsBuilderARMVIXL::VisitCurrentMethod(HCurrentMethod* instruction) {
}
void InstructionCodeGeneratorARMVIXL::VisitCurrentMethod(
- HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HCurrentMethod* instruction) {
// Nothing to do, the method is already at its location.
}
@@ -5769,7 +5819,7 @@ void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) {
locations->SetOut(Location::Any());
}
-void InstructionCodeGeneratorARMVIXL::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitPhi([[maybe_unused]] HPhi* instruction) {
LOG(FATAL) << "Unreachable";
}
@@ -6104,8 +6154,7 @@ Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* inpu
Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* constant,
Opcode opcode) {
DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
- if (constant->IsConstant() &&
- CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) {
+ if (constant->IsConstant() && CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) {
return Location::ConstantLocation(constant);
}
return Location::RequiresRegister();
@@ -7234,7 +7283,7 @@ void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
}
}
-void LocationsBuilderARMVIXL::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderARMVIXL::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
LOG(FATAL) << "Unreachable";
}
@@ -7604,7 +7653,9 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
locations->SetInAt(0, Location::RequiresRegister());
}
locations->SetOut(Location::RequiresRegister());
- if (load_kind == HLoadClass::LoadKind::kBssEntry) {
+ if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
if (!gUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the type resolution or initialization and marking to save everything we need.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
@@ -7631,9 +7682,8 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
Location out_loc = locations->Out();
vixl32::Register out = OutputRegister(cls);
- const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
- ? kWithoutReadBarrier
- : gCompilerReadBarrierOption;
+ const ReadBarrierOption read_barrier_option =
+ cls->IsInBootImage() ? kWithoutReadBarrier : GetCompilerReadBarrierOption();
bool generate_null_check = false;
switch (load_kind) {
case HLoadClass::LoadKind::kReferrersClass: {
@@ -7887,7 +7937,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
codegen_->EmitMovwMovtPlaceholder(labels, out);
// All aligned loads are implicitly atomic consume operations on ARM.
codegen_->GenerateGcRootFieldLoad(
- load, out_loc, out, /*offset=*/ 0, gCompilerReadBarrierOption);
+ load, out_loc, out, /*offset=*/0, GetCompilerReadBarrierOption());
LoadStringSlowPathARMVIXL* slow_path =
new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load);
codegen_->AddSlowPath(slow_path);
@@ -7908,14 +7958,13 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
load->GetString()));
// /* GcRoot<mirror::String> */ out = *out
codegen_->GenerateGcRootFieldLoad(
- load, out_loc, out, /*offset=*/ 0, gCompilerReadBarrierOption);
+ load, out_loc, out, /*offset=*/0, GetCompilerReadBarrierOption());
return;
}
default:
break;
}
- // TODO: Re-add the compiler code to do string dex cache lookup again.
DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall);
InvokeRuntimeCallingConventionARMVIXL calling_convention;
__ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
@@ -7944,7 +7993,7 @@ void LocationsBuilderARMVIXL::VisitClearException(HClearException* clear) {
new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
}
-void InstructionCodeGeneratorARMVIXL::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitClearException([[maybe_unused]] HClearException* clear) {
UseScratchRegisterScope temps(GetVIXLAssembler());
vixl32::Register temp = temps.Acquire();
__ Mov(temp, 0);
@@ -8490,12 +8539,11 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
kWithoutReadBarrier);
// /* HeapReference<Class> */ temp = temp->iftable_
- GenerateReferenceLoadTwoRegisters(instruction,
- temp_loc,
- temp_loc,
- iftable_offset,
- maybe_temp2_loc,
- kWithoutReadBarrier);
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ iftable_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// Iftable is never null.
__ Ldr(RegisterFrom(maybe_temp2_loc), MemOperand(temp, array_length_offset));
// Loop through the iftable and check if any class matches.
@@ -9828,7 +9876,7 @@ void CodeGeneratorARMVIXL::EmitThunkCode(const linker::LinkerPatch& patch,
assembler.FinalizeCode();
code->resize(assembler.CodeSize());
MemoryRegion code_region(code->data(), code->size());
- assembler.FinalizeInstructions(code_region);
+ assembler.CopyInstructions(code_region);
}
VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal(
@@ -9867,12 +9915,12 @@ void InstructionCodeGeneratorARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulat
}
}
-void LocationsBuilderARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderARMVIXL::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
// Nothing to do, this should be removed during prepare for register allocator.
LOG(FATAL) << "Unreachable";
}
-void InstructionCodeGeneratorARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
// Nothing to do, this should be removed during prepare for register allocator.
LOG(FATAL) << "Unreachable";
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index f5abe6951a..0175448fde 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -620,7 +620,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
block_labels_.resize(GetGraph()->GetBlocks().size());
}
- void Finalize(CodeAllocator* allocator) override;
+ void Finalize() override;
bool NeedsTwoRegisters(DataType::Type type) const override {
return type == DataType::Type::kFloat64 || type == DataType::Type::kInt64;
diff --git a/compiler/optimizing/code_generator_riscv64.cc b/compiler/optimizing/code_generator_riscv64.cc
new file mode 100644
index 0000000000..7f23730143
--- /dev/null
+++ b/compiler/optimizing/code_generator_riscv64.cc
@@ -0,0 +1,6494 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_riscv64.h"
+
+#include "android-base/logging.h"
+#include "android-base/macros.h"
+#include "arch/riscv64/jni_frame_riscv64.h"
+#include "arch/riscv64/registers_riscv64.h"
+#include "base/arena_containers.h"
+#include "base/macros.h"
+#include "code_generator_utils.h"
+#include "dwarf/register.h"
+#include "heap_poisoning.h"
+#include "intrinsics_list.h"
+#include "intrinsics_riscv64.h"
+#include "jit/profiling_info.h"
+#include "linker/linker_patch.h"
+#include "mirror/class-inl.h"
+#include "optimizing/nodes.h"
+#include "stack_map_stream.h"
+#include "utils/label.h"
+#include "utils/riscv64/assembler_riscv64.h"
+#include "utils/stack_checks.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+// Placeholder values embedded in instructions, patched at link time.
+constexpr uint32_t kLinkTimeOffsetPlaceholderHigh = 0x12345;
+constexpr uint32_t kLinkTimeOffsetPlaceholderLow = 0x678;
+
+// Compare-and-jump packed switch generates approx. 3 + 1.5 * N 32-bit
+// instructions for N cases.
+// Table-based packed switch generates approx. 10 32-bit instructions
+// and N 32-bit data words for N cases.
+// We switch to the table-based method starting with 6 entries.
+static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 6;
+
+// FCLASS returns a 10-bit classification mask with the two highest bits marking NaNs
+// (signaling and quiet). To detect a NaN, we can compare (either BGE or BGEU, the sign
+// bit is always clear) the result with the `kFClassNaNMinValue`.
+static_assert(kSignalingNaN == 0x100);
+static_assert(kQuietNaN == 0x200);
+static constexpr int32_t kFClassNaNMinValue = 0x100;
+
+static constexpr XRegister kCoreCalleeSaves[] = {
+ // S1(TR) is excluded as the ART thread register.
+ S0, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, RA
+};
+
+static constexpr FRegister kFpuCalleeSaves[] = {
+ FS0, FS1, FS2, FS3, FS4, FS5, FS6, FS7, FS8, FS9, FS10, FS11
+};
+
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kRiscv64PointerSize, x).Int32Value()
+
+Location RegisterOrZeroBitPatternLocation(HInstruction* instruction) {
+ return IsZeroBitPattern(instruction)
+ ? Location::ConstantLocation(instruction)
+ : Location::RequiresRegister();
+}
+
+XRegister InputXRegisterOrZero(Location location) {
+ if (location.IsConstant()) {
+ DCHECK(location.GetConstant()->IsZeroBitPattern());
+ return Zero;
+ } else {
+ return location.AsRegister<XRegister>();
+ }
+}
+
+Location ValueLocationForStore(HInstruction* value) {
+ if (IsZeroBitPattern(value)) {
+ return Location::ConstantLocation(value);
+ } else if (DataType::IsFloatingPointType(value->GetType())) {
+ return Location::RequiresFpuRegister();
+ } else {
+ return Location::RequiresRegister();
+ }
+}
+
+Location Riscv64ReturnLocation(DataType::Type return_type) {
+ switch (return_type) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kUint32:
+ case DataType::Type::kInt32:
+ case DataType::Type::kReference:
+ case DataType::Type::kUint64:
+ case DataType::Type::kInt64:
+ return Location::RegisterLocation(A0);
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ return Location::FpuRegisterLocation(FA0);
+
+ case DataType::Type::kVoid:
+ return Location::NoLocation();
+ }
+ UNREACHABLE();
+}
+
+static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
+ InvokeRuntimeCallingConvention calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ DCHECK_EQ(
+ calling_convention.GetRegisterAt(0),
+ calling_convention.GetReturnLocation(DataType::Type::kReference).AsRegister<XRegister>());
+ return caller_saves;
+}
+
+template <ClassStatus kStatus>
+static constexpr int64_t ShiftedSignExtendedClassStatusValue() {
+ // This is used only for status values that have the highest bit set.
+ static_assert(CLZ(enum_cast<uint32_t>(kStatus)) == status_lsb_position);
+ constexpr uint32_t kShiftedStatusValue = enum_cast<uint32_t>(kStatus) << status_lsb_position;
+ static_assert(kShiftedStatusValue >= 0x80000000u);
+ return static_cast<int64_t>(kShiftedStatusValue) - (INT64_C(1) << 32);
+}
+
+int32_t ReadBarrierMarkEntrypointOffset(Location ref) {
+ DCHECK(ref.IsRegister());
+ int reg = ref.reg();
+ DCHECK(T0 <= reg && reg <= T6 && reg != TR) << reg;
+ // Note: Entrypoints for registers X30 (T5) and X31 (T6) are stored in entries
+ // for X0 (Zero) and X1 (RA) because these are not valid registers for marking
+ // and we currently have slots only up to register 29.
+ int entry_point_number = (reg >= 30) ? reg - 30 : reg;
+ return Thread::ReadBarrierMarkEntryPointsOffset<kRiscv64PointerSize>(entry_point_number);
+}
+
+Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return_type) {
+ return Riscv64ReturnLocation(return_type);
+}
+
+Location InvokeDexCallingConventionVisitorRISCV64::GetReturnLocation(DataType::Type type) const {
+ return Riscv64ReturnLocation(type);
+}
+
+Location InvokeDexCallingConventionVisitorRISCV64::GetMethodLocation() const {
+ return Location::RegisterLocation(kArtMethodRegister);
+}
+
+Location InvokeDexCallingConventionVisitorRISCV64::GetNextLocation(DataType::Type type) {
+ Location next_location;
+ if (type == DataType::Type::kVoid) {
+ LOG(FATAL) << "Unexpected parameter type " << type;
+ }
+
+ // Note: Unlike the RISC-V C/C++ calling convention, managed ABI does not use
+ // GPRs to pass FP args when we run out of FPRs.
+ if (DataType::IsFloatingPointType(type) &&
+ float_index_ < calling_convention.GetNumberOfFpuRegisters()) {
+ next_location =
+ Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(float_index_++));
+ } else if (!DataType::IsFloatingPointType(type) &&
+ (gp_index_ < calling_convention.GetNumberOfRegisters())) {
+ next_location = Location::RegisterLocation(calling_convention.GetRegisterAt(gp_index_++));
+ } else {
+ size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
+ next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset) :
+ Location::StackSlot(stack_offset);
+ }
+
+ // Space on the stack is reserved for all arguments.
+ stack_index_ += DataType::Is64BitType(type) ? 2 : 1;
+
+ return next_location;
+}
+
+Location CriticalNativeCallingConventionVisitorRiscv64::GetNextLocation(DataType::Type type) {
+ DCHECK_NE(type, DataType::Type::kReference);
+
+ Location location = Location::NoLocation();
+ if (DataType::IsFloatingPointType(type)) {
+ if (fpr_index_ < kParameterFpuRegistersLength) {
+ location = Location::FpuRegisterLocation(kParameterFpuRegisters[fpr_index_]);
+ ++fpr_index_;
+ }
+ // Native ABI allows passing excessive FP args in GPRs. This is facilitated by
+ // inserting fake conversion intrinsic calls (`Double.doubleToRawLongBits()`
+ // or `Float.floatToRawIntBits()`) by `CriticalNativeAbiFixupRiscv64`.
+ // TODO(riscv64): Implement these intrinsics and `CriticalNativeAbiFixupRiscv64`.
+ } else {
+ // Native ABI uses the same core registers as a runtime call.
+ if (gpr_index_ < kRuntimeParameterCoreRegistersLength) {
+ location = Location::RegisterLocation(kRuntimeParameterCoreRegisters[gpr_index_]);
+ ++gpr_index_;
+ }
+ }
+ if (location.IsInvalid()) {
+ if (DataType::Is64BitType(type)) {
+ location = Location::DoubleStackSlot(stack_offset_);
+ } else {
+ location = Location::StackSlot(stack_offset_);
+ }
+ stack_offset_ += kFramePointerSize;
+
+ if (for_register_allocation_) {
+ location = Location::Any();
+ }
+ }
+ return location;
+}
+
+Location CriticalNativeCallingConventionVisitorRiscv64::GetReturnLocation(
+ DataType::Type type) const {
+ // The result is returned the same way in native ABI and managed ABI. No result conversion is
+ // needed, see comments in `Riscv64JniCallingConvention::RequiresSmallResultTypeExtension()`.
+ InvokeDexCallingConventionVisitorRISCV64 dex_calling_convention;
+ return dex_calling_convention.GetReturnLocation(type);
+}
+
+Location CriticalNativeCallingConventionVisitorRiscv64::GetMethodLocation() const {
+ // Pass the method in the hidden argument T0.
+ return Location::RegisterLocation(T0);
+}
+
+#define __ down_cast<CodeGeneratorRISCV64*>(codegen)->GetAssembler()-> // NOLINT
+
+void LocationsBuilderRISCV64::HandleInvoke(HInvoke* instruction) {
+ InvokeDexCallingConventionVisitorRISCV64 calling_convention_visitor;
+ CodeGenerator::CreateCommonInvokeLocationSummary(instruction, &calling_convention_visitor);
+}
+
+class CompileOptimizedSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ CompileOptimizedSlowPathRISCV64() : SlowPathCodeRISCV64(/*instruction=*/ nullptr) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ uint32_t entrypoint_offset =
+ GetThreadOffset<kRiscv64PointerSize>(kQuickCompileOptimized).Int32Value();
+ __ Bind(GetEntryLabel());
+ __ Loadd(RA, TR, entrypoint_offset);
+ // Note: we don't record the call here (and therefore don't generate a stack
+ // map), as the entrypoint should never be suspended.
+ __ Jalr(RA);
+ __ J(GetExitLabel());
+ }
+
+ const char* GetDescription() const override { return "CompileOptimizedSlowPath"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathRISCV64);
+};
+
+class SuspendCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ SuspendCheckSlowPathRISCV64(HSuspendCheck* instruction, HBasicBlock* successor)
+ : SlowPathCodeRISCV64(instruction), successor_(successor) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ LocationSummary* locations = instruction_->GetLocations();
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations); // Only saves live vector registers for SIMD.
+ riscv64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+ RestoreLiveRegisters(codegen, locations); // Only restores live vector registers for SIMD.
+ if (successor_ == nullptr) {
+ __ J(GetReturnLabel());
+ } else {
+ __ J(riscv64_codegen->GetLabelOf(successor_));
+ }
+ }
+
+ Riscv64Label* GetReturnLabel() {
+ DCHECK(successor_ == nullptr);
+ return &return_label_;
+ }
+
+ const char* GetDescription() const override { return "SuspendCheckSlowPathRISCV64"; }
+
+ HBasicBlock* GetSuccessor() const { return successor_; }
+
+ private:
+ // If not null, the block to branch to after the suspend check.
+ HBasicBlock* const successor_;
+
+ // If `successor_` is null, the label to branch to after the suspend check.
+ Riscv64Label return_label_;
+
+ DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathRISCV64);
+};
+
+class NullCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ explicit NullCheckSlowPathRISCV64(HNullCheck* instr) : SlowPathCodeRISCV64(instr) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ __ Bind(GetEntryLabel());
+ if (instruction_->CanThrowIntoCatchBlock()) {
+ // Live registers will be restored in the catch block if caught.
+ SaveLiveRegisters(codegen, instruction_->GetLocations());
+ }
+ riscv64_codegen->InvokeRuntime(
+ kQuickThrowNullPointer, instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
+ }
+
+ bool IsFatal() const override { return true; }
+
+ const char* GetDescription() const override { return "NullCheckSlowPathRISCV64"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathRISCV64);
+};
+
+class BoundsCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ explicit BoundsCheckSlowPathRISCV64(HBoundsCheck* instruction)
+ : SlowPathCodeRISCV64(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ LocationSummary* locations = instruction_->GetLocations();
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ __ Bind(GetEntryLabel());
+ if (instruction_->CanThrowIntoCatchBlock()) {
+ // Live registers will be restored in the catch block if caught.
+ SaveLiveRegisters(codegen, instruction_->GetLocations());
+ }
+ // We're moving two locations to locations that could overlap, so we need a parallel
+ // move resolver.
+ InvokeRuntimeCallingConvention calling_convention;
+ codegen->EmitParallelMoves(locations->InAt(0),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ DataType::Type::kInt32,
+ locations->InAt(1),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ DataType::Type::kInt32);
+ QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() ?
+ kQuickThrowStringBounds :
+ kQuickThrowArrayBounds;
+ riscv64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
+ CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
+ }
+
+ bool IsFatal() const override { return true; }
+
+ const char* GetDescription() const override { return "BoundsCheckSlowPathRISCV64"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathRISCV64);
+};
+
+class LoadClassSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ LoadClassSlowPathRISCV64(HLoadClass* cls, HInstruction* at) : SlowPathCodeRISCV64(at), cls_(cls) {
+ DCHECK(at->IsLoadClass() || at->IsClinitCheck());
+ DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ LocationSummary* locations = instruction_->GetLocations();
+ Location out = locations->Out();
+ const uint32_t dex_pc = instruction_->GetDexPc();
+ bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
+ bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
+
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ if (must_resolve_type) {
+ DCHECK(IsSameDexFile(cls_->GetDexFile(), riscv64_codegen->GetGraph()->GetDexFile()) ||
+ riscv64_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
+ ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
+ &cls_->GetDexFile()));
+ dex::TypeIndex type_index = cls_->GetTypeIndex();
+ __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_);
+ if (cls_->NeedsAccessCheck()) {
+ CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
+ riscv64_codegen->InvokeRuntime(
+ kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
+ } else {
+ CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
+ riscv64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
+ }
+ // If we also must_do_clinit, the resolved type is now in the correct register.
+ } else {
+ DCHECK(must_do_clinit);
+ Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
+ riscv64_codegen->MoveLocation(
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source, cls_->GetType());
+ }
+ if (must_do_clinit) {
+ riscv64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
+ }
+
+ // Move the class to the desired location.
+ if (out.IsValid()) {
+ DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+ DataType::Type type = DataType::Type::kReference;
+ DCHECK_EQ(type, instruction_->GetType());
+ riscv64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
+ }
+ RestoreLiveRegisters(codegen, locations);
+
+ __ J(GetExitLabel());
+ }
+
+ const char* GetDescription() const override { return "LoadClassSlowPathRISCV64"; }
+
+ private:
+ // The class this slow path will load.
+ HLoadClass* const cls_;
+
+ DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathRISCV64);
+};
+
+class DeoptimizationSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ explicit DeoptimizationSlowPathRISCV64(HDeoptimize* instruction)
+ : SlowPathCodeRISCV64(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ __ Bind(GetEntryLabel());
+ LocationSummary* locations = instruction_->GetLocations();
+ SaveLiveRegisters(codegen, locations);
+ InvokeRuntimeCallingConvention calling_convention;
+ __ LoadConst32(calling_convention.GetRegisterAt(0),
+ static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
+ riscv64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
+ }
+
+ const char* GetDescription() const override { return "DeoptimizationSlowPathRISCV64"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathRISCV64);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ ReadBarrierForRootSlowPathRISCV64(HInstruction* instruction, Location out, Location root)
+ : SlowPathCodeRISCV64(instruction), out_(out), root_(root) {
+ DCHECK(gUseReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ LocationSummary* locations = instruction_->GetLocations();
+ DataType::Type type = DataType::Type::kReference;
+ XRegister reg_out = out_.AsRegister<XRegister>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(instruction_->IsLoadClass() ||
+ instruction_->IsLoadString() ||
+ (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier for GC root slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ riscv64_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ root_,
+ DataType::Type::kReference);
+ riscv64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+ riscv64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+ RestoreLiveRegisters(codegen, locations);
+ __ J(GetExitLabel());
+ }
+
+ const char* GetDescription() const override { return "ReadBarrierForRootSlowPathRISCV64"; }
+
+ private:
+ const Location out_;
+ const Location root_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathRISCV64);
+};
+
+class ArraySetSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ explicit ArraySetSlowPathRISCV64(HInstruction* instruction) : SlowPathCodeRISCV64(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ LocationSummary* locations = instruction_->GetLocations();
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
+ parallel_move.AddMove(
+ locations->InAt(0),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ DataType::Type::kReference,
+ nullptr);
+ parallel_move.AddMove(
+ locations->InAt(1),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ DataType::Type::kInt32,
+ nullptr);
+ parallel_move.AddMove(
+ locations->InAt(2),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+ DataType::Type::kReference,
+ nullptr);
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ riscv64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+ RestoreLiveRegisters(codegen, locations);
+ __ J(GetExitLabel());
+ }
+
+ const char* GetDescription() const override { return "ArraySetSlowPathRISCV64"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathRISCV64);
+};
+
+class TypeCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ explicit TypeCheckSlowPathRISCV64(HInstruction* instruction, bool is_fatal)
+ : SlowPathCodeRISCV64(instruction), is_fatal_(is_fatal) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ LocationSummary* locations = instruction_->GetLocations();
+
+ uint32_t dex_pc = instruction_->GetDexPc();
+ DCHECK(instruction_->IsCheckCast()
+ || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+
+ __ Bind(GetEntryLabel());
+ if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
+ SaveLiveRegisters(codegen, locations);
+ }
+
+ // We're moving two locations to locations that could overlap, so we need a parallel
+ // move resolver.
+ InvokeRuntimeCallingConvention calling_convention;
+ codegen->EmitParallelMoves(locations->InAt(0),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ DataType::Type::kReference,
+ locations->InAt(1),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ DataType::Type::kReference);
+ if (instruction_->IsInstanceOf()) {
+ riscv64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
+ DataType::Type ret_type = instruction_->GetType();
+ Location ret_loc = calling_convention.GetReturnLocation(ret_type);
+ riscv64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
+ } else {
+ DCHECK(instruction_->IsCheckCast());
+ riscv64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
+ }
+
+ if (!is_fatal_) {
+ RestoreLiveRegisters(codegen, locations);
+ __ J(GetExitLabel());
+ }
+ }
+
+ const char* GetDescription() const override { return "TypeCheckSlowPathRISCV64"; }
+
+ bool IsFatal() const override { return is_fatal_; }
+
+ private:
+ const bool is_fatal_;
+
+ DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathRISCV64);
+};
+
+class DivZeroCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ explicit DivZeroCheckSlowPathRISCV64(HDivZeroCheck* instruction)
+ : SlowPathCodeRISCV64(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ __ Bind(GetEntryLabel());
+ riscv64_codegen->InvokeRuntime(
+ kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
+ }
+
+ bool IsFatal() const override { return true; }
+
+ const char* GetDescription() const override { return "DivZeroCheckSlowPathRISCV64"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathRISCV64);
+};
+
+class ReadBarrierMarkSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ ReadBarrierMarkSlowPathRISCV64(HInstruction* instruction, Location ref, Location entrypoint)
+ : SlowPathCodeRISCV64(instruction), ref_(ref), entrypoint_(entrypoint) {
+ DCHECK(gUseReadBarrier);
+ DCHECK(entrypoint.IsRegister());
+ }
+
+ const char* GetDescription() const override { return "ReadBarrierMarkSlowPathRISCV64"; }
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ LocationSummary* locations = instruction_->GetLocations();
+ XRegister ref_reg = ref_.AsRegister<XRegister>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ DCHECK(instruction_->IsInstanceFieldGet() ||
+ instruction_->IsPredicatedInstanceFieldGet() ||
+ instruction_->IsStaticFieldGet() ||
+ instruction_->IsArrayGet() ||
+ instruction_->IsArraySet() ||
+ instruction_->IsLoadClass() ||
+ instruction_->IsLoadString() ||
+ instruction_->IsInstanceOf() ||
+ instruction_->IsCheckCast() ||
+ (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ DCHECK(ref_reg >= T0 && ref_reg != TR);
+
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in A0 and V0 respectively):
+ //
+ // A0 <- ref
+ // V0 <- ReadBarrierMark(A0)
+ // ref <- V0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ riscv64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+ DCHECK_NE(entrypoint_.AsRegister<XRegister>(), TMP); // A taken branch can clobber `TMP`.
+ __ Jalr(entrypoint_.AsRegister<XRegister>()); // Clobbers `RA` (used as the `entrypoint_`).
+ __ J(GetExitLabel());
+ }
+
+ private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
+
+ // The location of the already loaded entrypoint.
+ const Location entrypoint_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathRISCV64);
+};
+
+class LoadStringSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ explicit LoadStringSlowPathRISCV64(HLoadString* instruction)
+ : SlowPathCodeRISCV64(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ DCHECK(instruction_->IsLoadString());
+ DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+ const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ InvokeRuntimeCallingConvention calling_convention;
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ __ LoadConst32(calling_convention.GetRegisterAt(0), string_index.index_);
+ riscv64_codegen->InvokeRuntime(
+ kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+
+ DataType::Type type = DataType::Type::kReference;
+ DCHECK_EQ(type, instruction_->GetType());
+ riscv64_codegen->MoveLocation(
+ locations->Out(), calling_convention.GetReturnLocation(type), type);
+ RestoreLiveRegisters(codegen, locations);
+
+ __ J(GetExitLabel());
+ }
+
+ const char* GetDescription() const override { return "LoadStringSlowPathRISCV64"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathRISCV64);
+};
+
+#undef __
+#define __ down_cast<Riscv64Assembler*>(GetAssembler())-> // NOLINT
+
+template <typename Reg,
+ void (Riscv64Assembler::*opS)(Reg, FRegister, FRegister),
+ void (Riscv64Assembler::*opD)(Reg, FRegister, FRegister)>
+inline void InstructionCodeGeneratorRISCV64::FpBinOp(
+ Reg rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+ Riscv64Assembler* assembler = down_cast<CodeGeneratorRISCV64*>(codegen_)->GetAssembler();
+ if (type == DataType::Type::kFloat32) {
+ (assembler->*opS)(rd, rs1, rs2);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat64);
+ (assembler->*opD)(rd, rs1, rs2);
+ }
+}
+
+inline void InstructionCodeGeneratorRISCV64::FAdd(
+ FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+ FpBinOp<FRegister, &Riscv64Assembler::FAddS, &Riscv64Assembler::FAddD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FSub(
+ FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+ FpBinOp<FRegister, &Riscv64Assembler::FSubS, &Riscv64Assembler::FSubD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FDiv(
+ FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+ FpBinOp<FRegister, &Riscv64Assembler::FDivS, &Riscv64Assembler::FDivD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FMul(
+ FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+ FpBinOp<FRegister, &Riscv64Assembler::FMulS, &Riscv64Assembler::FMulD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FMin(
+ FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+ FpBinOp<FRegister, &Riscv64Assembler::FMinS, &Riscv64Assembler::FMinD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FMax(
+ FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+ FpBinOp<FRegister, &Riscv64Assembler::FMaxS, &Riscv64Assembler::FMaxD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FEq(
+ XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+ FpBinOp<XRegister, &Riscv64Assembler::FEqS, &Riscv64Assembler::FEqD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FLt(
+ XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+ FpBinOp<XRegister, &Riscv64Assembler::FLtS, &Riscv64Assembler::FLtD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FLe(
+ XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+ FpBinOp<XRegister, &Riscv64Assembler::FLeS, &Riscv64Assembler::FLeD>(rd, rs1, rs2, type);
+}
+
+template <typename Reg,
+ void (Riscv64Assembler::*opS)(Reg, FRegister),
+ void (Riscv64Assembler::*opD)(Reg, FRegister)>
+inline void InstructionCodeGeneratorRISCV64::FpUnOp(
+ Reg rd, FRegister rs1, DataType::Type type) {
+ Riscv64Assembler* assembler = down_cast<CodeGeneratorRISCV64*>(codegen_)->GetAssembler();
+ if (type == DataType::Type::kFloat32) {
+ (assembler->*opS)(rd, rs1);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat64);
+ (assembler->*opD)(rd, rs1);
+ }
+}
+
+inline void InstructionCodeGeneratorRISCV64::FAbs(
+ FRegister rd, FRegister rs1, DataType::Type type) {
+ FpUnOp<FRegister, &Riscv64Assembler::FAbsS, &Riscv64Assembler::FAbsD>(rd, rs1, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FNeg(
+ FRegister rd, FRegister rs1, DataType::Type type) {
+ FpUnOp<FRegister, &Riscv64Assembler::FNegS, &Riscv64Assembler::FNegD>(rd, rs1, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FMv(
+ FRegister rd, FRegister rs1, DataType::Type type) {
+ FpUnOp<FRegister, &Riscv64Assembler::FMvS, &Riscv64Assembler::FMvD>(rd, rs1, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FClass(
+ XRegister rd, FRegister rs1, DataType::Type type) {
+ FpUnOp<XRegister, &Riscv64Assembler::FClassS, &Riscv64Assembler::FClassD>(rd, rs1, type);
+}
+
+void InstructionCodeGeneratorRISCV64::Load(
+ Location out, XRegister rs1, int32_t offset, DataType::Type type) {
+ switch (type) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ __ Loadbu(out.AsRegister<XRegister>(), rs1, offset);
+ break;
+ case DataType::Type::kInt8:
+ __ Loadb(out.AsRegister<XRegister>(), rs1, offset);
+ break;
+ case DataType::Type::kUint16:
+ __ Loadhu(out.AsRegister<XRegister>(), rs1, offset);
+ break;
+ case DataType::Type::kInt16:
+ __ Loadh(out.AsRegister<XRegister>(), rs1, offset);
+ break;
+ case DataType::Type::kInt32:
+ __ Loadw(out.AsRegister<XRegister>(), rs1, offset);
+ break;
+ case DataType::Type::kInt64:
+ __ Loadd(out.AsRegister<XRegister>(), rs1, offset);
+ break;
+ case DataType::Type::kReference:
+ __ Loadwu(out.AsRegister<XRegister>(), rs1, offset);
+ break;
+ case DataType::Type::kFloat32:
+ __ FLoadw(out.AsFpuRegister<FRegister>(), rs1, offset);
+ break;
+ case DataType::Type::kFloat64:
+ __ FLoadd(out.AsFpuRegister<FRegister>(), rs1, offset);
+ break;
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
+ case DataType::Type::kVoid:
+ LOG(FATAL) << "Unreachable type " << type;
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::Store(
+ Location value, XRegister rs1, int32_t offset, DataType::Type type) {
+ DCHECK_IMPLIES(value.IsConstant(), IsZeroBitPattern(value.GetConstant()));
+ if (kPoisonHeapReferences && type == DataType::Type::kReference && !value.IsConstant()) {
+ riscv64::ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ __ Mv(tmp, value.AsRegister<XRegister>());
+ codegen_->PoisonHeapReference(tmp);
+ __ Storew(tmp, rs1, offset);
+ return;
+ }
+ switch (type) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ __ Storeb(InputXRegisterOrZero(value), rs1, offset);
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ __ Storeh(InputXRegisterOrZero(value), rs1, offset);
+ break;
+ case DataType::Type::kFloat32:
+ if (!value.IsConstant()) {
+ __ FStorew(value.AsFpuRegister<FRegister>(), rs1, offset);
+ break;
+ }
+ FALLTHROUGH_INTENDED;
+ case DataType::Type::kInt32:
+ case DataType::Type::kReference:
+ __ Storew(InputXRegisterOrZero(value), rs1, offset);
+ break;
+ case DataType::Type::kFloat64:
+ if (!value.IsConstant()) {
+ __ FStored(value.AsFpuRegister<FRegister>(), rs1, offset);
+ break;
+ }
+ FALLTHROUGH_INTENDED;
+ case DataType::Type::kInt64:
+ __ Stored(InputXRegisterOrZero(value), rs1, offset);
+ break;
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
+ case DataType::Type::kVoid:
+ LOG(FATAL) << "Unreachable type " << type;
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::ShNAdd(
+ XRegister rd, XRegister rs1, XRegister rs2, DataType::Type type) {
+ switch (type) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ DCHECK_EQ(DataType::SizeShift(type), 0u);
+ __ Add(rd, rs1, rs2);
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ DCHECK_EQ(DataType::SizeShift(type), 1u);
+ __ Sh1Add(rd, rs1, rs2);
+ break;
+ case DataType::Type::kInt32:
+ case DataType::Type::kReference:
+ case DataType::Type::kFloat32:
+ DCHECK_EQ(DataType::SizeShift(type), 2u);
+ __ Sh2Add(rd, rs1, rs2);
+ break;
+ case DataType::Type::kInt64:
+ case DataType::Type::kFloat64:
+ DCHECK_EQ(DataType::SizeShift(type), 3u);
+ __ Sh3Add(rd, rs1, rs2);
+ break;
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
+ case DataType::Type::kVoid:
+ LOG(FATAL) << "Unreachable type " << type;
+ UNREACHABLE();
+ }
+}
+
+Riscv64Assembler* ParallelMoveResolverRISCV64::GetAssembler() const {
+ return codegen_->GetAssembler();
+}
+
+void ParallelMoveResolverRISCV64::EmitMove(size_t index) {
+ MoveOperands* move = moves_[index];
+ codegen_->MoveLocation(move->GetDestination(), move->GetSource(), move->GetType());
+}
+
+void ParallelMoveResolverRISCV64::EmitSwap(size_t index) {
+ MoveOperands* move = moves_[index];
+ codegen_->SwapLocations(move->GetDestination(), move->GetSource(), move->GetType());
+}
+
+void ParallelMoveResolverRISCV64::SpillScratch([[maybe_unused]] int reg) {
+ LOG(FATAL) << "Unimplemented";
+ UNREACHABLE();
+}
+
+void ParallelMoveResolverRISCV64::RestoreScratch([[maybe_unused]] int reg) {
+ LOG(FATAL) << "Unimplemented";
+ UNREACHABLE();
+}
+
+void ParallelMoveResolverRISCV64::Exchange(int index1, int index2, bool double_slot) {
+ // We have 2 scratch X registers and 1 scratch F register that we can use. We prefer
+ // to use X registers for the swap but if both offsets are too big, we need to reserve
+ // one of the X registers for address adjustment and use an F register.
+ bool use_fp_tmp2 = false;
+ if (!IsInt<12>(index2)) {
+ if (!IsInt<12>(index1)) {
+ use_fp_tmp2 = true;
+ } else {
+ std::swap(index1, index2);
+ }
+ }
+ DCHECK_IMPLIES(!IsInt<12>(index2), use_fp_tmp2);
+
+ Location loc1(double_slot ? Location::DoubleStackSlot(index1) : Location::StackSlot(index1));
+ Location loc2(double_slot ? Location::DoubleStackSlot(index2) : Location::StackSlot(index2));
+ riscv64::ScratchRegisterScope srs(GetAssembler());
+ Location tmp = Location::RegisterLocation(srs.AllocateXRegister());
+ DataType::Type tmp_type = double_slot ? DataType::Type::kInt64 : DataType::Type::kInt32;
+ Location tmp2 = use_fp_tmp2
+ ? Location::FpuRegisterLocation(srs.AllocateFRegister())
+ : Location::RegisterLocation(srs.AllocateXRegister());
+ DataType::Type tmp2_type = use_fp_tmp2
+ ? (double_slot ? DataType::Type::kFloat64 : DataType::Type::kFloat32)
+ : tmp_type;
+
+ codegen_->MoveLocation(tmp, loc1, tmp_type);
+ codegen_->MoveLocation(tmp2, loc2, tmp2_type);
+ if (use_fp_tmp2) {
+ codegen_->MoveLocation(loc2, tmp, tmp_type);
+ } else {
+ // We cannot use `Stored()` or `Storew()` via `MoveLocation()` because we have
+ // no more scratch registers available. Use `Sd()` or `Sw()` explicitly.
+ DCHECK(IsInt<12>(index2));
+ if (double_slot) {
+ __ Sd(tmp.AsRegister<XRegister>(), SP, index2);
+ } else {
+ __ Sw(tmp.AsRegister<XRegister>(), SP, index2);
+ }
+ srs.FreeXRegister(tmp.AsRegister<XRegister>()); // Free a temporary for `MoveLocation()`.
+ }
+ codegen_->MoveLocation(loc1, tmp2, tmp2_type);
+}
+
+InstructionCodeGeneratorRISCV64::InstructionCodeGeneratorRISCV64(HGraph* graph,
+ CodeGeneratorRISCV64* codegen)
+ : InstructionCodeGenerator(graph, codegen),
+ assembler_(codegen->GetAssembler()),
+ codegen_(codegen) {}
+
+void InstructionCodeGeneratorRISCV64::GenerateClassInitializationCheck(
+ SlowPathCodeRISCV64* slow_path, XRegister class_reg) {
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ XRegister tmp2 = srs.AllocateXRegister();
+
+ // We shall load the full 32-bit status word with sign-extension and compare as unsigned
+ // to a sign-extended shifted status value. This yields the same comparison as loading and
+ // materializing unsigned but the constant is materialized with a single LUI instruction.
+ __ Loadw(tmp, class_reg, mirror::Class::StatusOffset().SizeValue()); // Sign-extended.
+ __ Li(tmp2, ShiftedSignExtendedClassStatusValue<ClassStatus::kVisiblyInitialized>());
+ __ Bltu(tmp, tmp2, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateBitstringTypeCheckCompare(
+ HTypeCheckInstruction* instruction, XRegister temp) {
+ UNUSED(instruction);
+ UNUSED(temp);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateSuspendCheck(HSuspendCheck* instruction,
+ HBasicBlock* successor) {
+ if (instruction->IsNoOp()) {
+ if (successor != nullptr) {
+ __ J(codegen_->GetLabelOf(successor));
+ }
+ return;
+ }
+
+ if (codegen_->CanUseImplicitSuspendCheck()) {
+ LOG(FATAL) << "Unimplemented ImplicitSuspendCheck";
+ return;
+ }
+
+ SuspendCheckSlowPathRISCV64* slow_path =
+ down_cast<SuspendCheckSlowPathRISCV64*>(instruction->GetSlowPath());
+
+ if (slow_path == nullptr) {
+ slow_path =
+ new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathRISCV64(instruction, successor);
+ instruction->SetSlowPath(slow_path);
+ codegen_->AddSlowPath(slow_path);
+ if (successor != nullptr) {
+ DCHECK(successor->IsLoopHeader());
+ }
+ } else {
+ DCHECK_EQ(slow_path->GetSuccessor(), successor);
+ }
+
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ __ Loadw(tmp, TR, Thread::ThreadFlagsOffset<kRiscv64PointerSize>().Int32Value());
+ static_assert(Thread::SuspendOrCheckpointRequestFlags() != std::numeric_limits<uint32_t>::max());
+ static_assert(IsPowerOfTwo(Thread::SuspendOrCheckpointRequestFlags() + 1u));
+ // Shift out other bits. Use an instruction that can be 16-bit with the "C" Standard Extension.
+ __ Slli(tmp, tmp, CLZ(static_cast<uint64_t>(Thread::SuspendOrCheckpointRequestFlags())));
+ if (successor == nullptr) {
+ __ Bnez(tmp, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetReturnLabel());
+ } else {
+ __ Beqz(tmp, codegen_->GetLabelOf(successor));
+ __ J(slow_path->GetEntryLabel());
+ // slow_path will return to GetLabelOf(successor).
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateReferenceLoadOneRegister(
+ HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option) {
+ XRegister out_reg = out.AsRegister<XRegister>();
+ if (read_barrier_option == kWithReadBarrier) {
+ CHECK(gUseReadBarrier);
+ if (kUseBakerReadBarrier) {
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out,
+ out_reg,
+ offset,
+ maybe_temp,
+ /* needs_null_check= */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // Save the value of `out` into `maybe_temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ __ Mv(maybe_temp.AsRegister<XRegister>(), out_reg);
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ Loadwu(out_reg, out_reg, offset);
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ Loadwu(out_reg, out_reg, offset);
+ codegen_->MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateReferenceLoadTwoRegisters(
+ HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option) {
+ XRegister out_reg = out.AsRegister<XRegister>();
+ XRegister obj_reg = obj.AsRegister<XRegister>();
+ if (read_barrier_option == kWithReadBarrier) {
+ CHECK(gUseReadBarrier);
+ if (kUseBakerReadBarrier) {
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out,
+ obj_reg,
+ offset,
+ maybe_temp,
+ /* needs_null_check= */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ Loadwu(out_reg, obj_reg, offset);
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ Loadwu(out_reg, obj_reg, offset);
+ codegen_->MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ XRegister obj,
+ uint32_t offset,
+ ReadBarrierOption read_barrier_option,
+ Riscv64Label* label_low) {
+ DCHECK_IMPLIES(label_low != nullptr, offset == kLinkTimeOffsetPlaceholderLow) << offset;
+ XRegister root_reg = root.AsRegister<XRegister>();
+ if (read_barrier_option == kWithReadBarrier) {
+ DCHECK(gUseReadBarrier);
+ if (kUseBakerReadBarrier) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded GC root or not. Instead, we
+ // load into `temp` (T6) the read barrier mark entry point corresponding
+ // to register `root`. If `temp` is null, it means that `GetIsGcMarking()`
+ // is false, and vice versa.
+ //
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // if (temp != null) {
+ // root = temp(root)
+ // }
+ //
+ // TODO(riscv64): Introduce a "marking register" that holds the pointer to one of the
+ // register marking entrypoints if marking (null if not marking) and make sure that
+ // marking entrypoints for other registers are at known offsets, so that we can call
+ // them using the "marking register" plus the offset embedded in the JALR instruction.
+
+ if (label_low != nullptr) {
+ __ Bind(label_low);
+ }
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ Loadwu(root_reg, obj, offset);
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // Slow path marking the GC root `root`.
+ XRegister tmp = RA; // Use RA as temp. It is clobbered in the slow path anyway.
+ SlowPathCodeRISCV64* slow_path =
+ new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathRISCV64(
+ instruction, root, Location::RegisterLocation(tmp));
+ codegen_->AddSlowPath(slow_path);
+
+ const int32_t entry_point_offset = ReadBarrierMarkEntrypointOffset(root);
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ Loadd(tmp, TR, entry_point_offset);
+ __ Bnez(tmp, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ } else {
+ // GC root loaded through a slow path for read barriers other
+ // than Baker's.
+ // /* GcRoot<mirror::Object>* */ root = obj + offset
+ if (label_low != nullptr) {
+ __ Bind(label_low);
+ }
+ __ AddConst32(root_reg, obj, offset);
+ // /* mirror::Object* */ root = root->Read()
+ codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+ }
+ } else {
+ // Plain GC root load with no read barrier.
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ if (label_low != nullptr) {
+ __ Bind(label_low);
+ }
+ __ Loadwu(root_reg, obj, offset);
+ // Note that GC roots are not affected by heap poisoning, thus we
+ // do not have to unpoison `root_reg` here.
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateTestAndBranch(HInstruction* instruction,
+ size_t condition_input_index,
+ Riscv64Label* true_target,
+ Riscv64Label* false_target) {
+ HInstruction* cond = instruction->InputAt(condition_input_index);
+
+ if (true_target == nullptr && false_target == nullptr) {
+ // Nothing to do. The code always falls through.
+ return;
+ } else if (cond->IsIntConstant()) {
+ // Constant condition, statically compared against "true" (integer value 1).
+ if (cond->AsIntConstant()->IsTrue()) {
+ if (true_target != nullptr) {
+ __ J(true_target);
+ }
+ } else {
+ DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
+ if (false_target != nullptr) {
+ __ J(false_target);
+ }
+ }
+ return;
+ }
+
+ // The following code generates these patterns:
+ // (1) true_target == nullptr && false_target != nullptr
+ // - opposite condition true => branch to false_target
+ // (2) true_target != nullptr && false_target == nullptr
+ // - condition true => branch to true_target
+ // (3) true_target != nullptr && false_target != nullptr
+ // - condition true => branch to true_target
+ // - branch to false_target
+ if (IsBooleanValueOrMaterializedCondition(cond)) {
+ // The condition instruction has been materialized, compare the output to 0.
+ Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
+ DCHECK(cond_val.IsRegister());
+ if (true_target == nullptr) {
+ __ Beqz(cond_val.AsRegister<XRegister>(), false_target);
+ } else {
+ __ Bnez(cond_val.AsRegister<XRegister>(), true_target);
+ }
+ } else {
+ // The condition instruction has not been materialized, use its inputs as
+ // the comparison and its condition as the branch condition.
+ HCondition* condition = cond->AsCondition();
+ DataType::Type type = condition->InputAt(0)->GetType();
+ LocationSummary* locations = condition->GetLocations();
+ IfCondition if_cond = condition->GetCondition();
+ Riscv64Label* branch_target = true_target;
+
+ if (true_target == nullptr) {
+ if_cond = condition->GetOppositeCondition();
+ branch_target = false_target;
+ }
+
+ switch (type) {
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateFpCondition(if_cond, condition->IsGtBias(), type, locations, branch_target);
+ break;
+ default:
+ // Integral types and reference equality.
+ GenerateIntLongCompareAndBranch(if_cond, locations, branch_target);
+ break;
+ }
+ }
+
+ // If neither branch falls through (case 3), the conditional branch to `true_target`
+ // was already emitted (case 2) and we need to emit a jump to `false_target`.
+ if (true_target != nullptr && false_target != nullptr) {
+ __ J(false_target);
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ DataType::Type type = instruction->GetResultType();
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ XRegister dividend = locations->InAt(0).AsRegister<XRegister>();
+ int64_t imm = Int64FromConstant(second.GetConstant());
+ DCHECK(imm == 1 || imm == -1);
+
+ if (instruction->IsRem()) {
+ __ Mv(out, Zero);
+ } else {
+ if (imm == -1) {
+ if (type == DataType::Type::kInt32) {
+ __ Subw(out, Zero, dividend);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt64);
+ __ Sub(out, Zero, dividend);
+ }
+ } else if (out != dividend) {
+ __ Mv(out, dividend);
+ }
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ DataType::Type type = instruction->GetResultType();
+ DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64) << type;
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ XRegister dividend = locations->InAt(0).AsRegister<XRegister>();
+ int64_t imm = Int64FromConstant(second.GetConstant());
+ int64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
+ int ctz_imm = CTZ(abs_imm);
+ DCHECK_GE(ctz_imm, 1); // Division by +/-1 is handled by `DivRemOneOrMinusOne()`.
+
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ // Calculate the negative dividend adjustment `tmp = dividend < 0 ? abs_imm - 1 : 0`.
+ // This adjustment is needed for rounding the division result towards zero.
+ if (type == DataType::Type::kInt32 || ctz_imm == 1) {
+ // A 32-bit dividend is sign-extended to 64-bit, so we can use the upper bits.
+ // And for a 64-bit division by +/-2, we need just the sign bit.
+ DCHECK_IMPLIES(type == DataType::Type::kInt32, ctz_imm < 32);
+ __ Srli(tmp, dividend, 64 - ctz_imm);
+ } else {
+ // For other 64-bit divisions, we need to replicate the sign bit.
+ __ Srai(tmp, dividend, 63);
+ __ Srli(tmp, tmp, 64 - ctz_imm);
+ }
+ // The rest of the calculation can use 64-bit operations even for 32-bit div/rem.
+ __ Add(tmp, tmp, dividend);
+ if (instruction->IsDiv()) {
+ __ Srai(out, tmp, ctz_imm);
+ if (imm < 0) {
+ __ Neg(out, out);
+ }
+ } else {
+ if (ctz_imm <= 11) {
+ __ Andi(tmp, tmp, -abs_imm);
+ } else {
+ ScratchRegisterScope srs2(GetAssembler());
+ XRegister tmp2 = srs2.AllocateXRegister();
+ __ Li(tmp2, -abs_imm);
+ __ And(tmp, tmp, tmp2);
+ }
+ __ Sub(out, dividend, tmp);
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ LocationSummary* locations = instruction->GetLocations();
+ XRegister dividend = locations->InAt(0).AsRegister<XRegister>();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ Location second = locations->InAt(1);
+ int64_t imm = Int64FromConstant(second.GetConstant());
+ DataType::Type type = instruction->GetResultType();
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+
+ // TODO: optimize with constant.
+ __ LoadConst64(tmp, imm);
+ if (instruction->IsDiv()) {
+ if (type == DataType::Type::kInt32) {
+ __ Divw(out, dividend, tmp);
+ } else {
+ __ Div(out, dividend, tmp);
+ }
+ } else {
+ if (type == DataType::Type::kInt32) {
+ __ Remw(out, dividend, tmp);
+ } else {
+ __ Rem(out, dividend, tmp);
+ }
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ DataType::Type type = instruction->GetResultType();
+ DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64) << type;
+
+ LocationSummary* locations = instruction->GetLocations();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ Location second = locations->InAt(1);
+
+ if (second.IsConstant()) {
+ int64_t imm = Int64FromConstant(second.GetConstant());
+ if (imm == 0) {
+ // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+ } else if (imm == 1 || imm == -1) {
+ DivRemOneOrMinusOne(instruction);
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+ DivRemByPowerOfTwo(instruction);
+ } else {
+ DCHECK(imm <= -2 || imm >= 2);
+ GenerateDivRemWithAnyConstant(instruction);
+ }
+ } else {
+ XRegister dividend = locations->InAt(0).AsRegister<XRegister>();
+ XRegister divisor = second.AsRegister<XRegister>();
+ if (instruction->IsDiv()) {
+ if (type == DataType::Type::kInt32) {
+ __ Divw(out, dividend, divisor);
+ } else {
+ __ Div(out, dividend, divisor);
+ }
+ } else {
+ if (type == DataType::Type::kInt32) {
+ __ Remw(out, dividend, divisor);
+ } else {
+ __ Rem(out, dividend, divisor);
+ }
+ }
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateIntLongCondition(IfCondition cond,
+ LocationSummary* locations) {
+ XRegister rd = locations->Out().AsRegister<XRegister>();
+ XRegister rs1 = locations->InAt(0).AsRegister<XRegister>();
+ Location rs2_location = locations->InAt(1);
+ bool use_imm = rs2_location.IsConstant();
+ int64_t imm = use_imm ? CodeGenerator::GetInt64ValueOf(rs2_location.GetConstant()) : 0;
+ XRegister rs2 = use_imm ? kNoXRegister : rs2_location.AsRegister<XRegister>();
+ switch (cond) {
+ case kCondEQ:
+ case kCondNE:
+ if (!use_imm) {
+ __ Sub(rd, rs1, rs2); // SUB is OK here even for 32-bit comparison.
+ } else if (imm != 0) {
+ DCHECK(IsInt<12>(-imm));
+ __ Addi(rd, rs1, -imm); // ADDI is OK here even for 32-bit comparison.
+ } // else test `rs1` directly without subtraction for `use_imm && imm == 0`.
+ if (cond == kCondEQ) {
+ __ Seqz(rd, (use_imm && imm == 0) ? rs1 : rd);
+ } else {
+ __ Snez(rd, (use_imm && imm == 0) ? rs1 : rd);
+ }
+ break;
+
+ case kCondLT:
+ case kCondGE:
+ if (use_imm) {
+ DCHECK(IsInt<12>(imm));
+ __ Slti(rd, rs1, imm);
+ } else {
+ __ Slt(rd, rs1, rs2);
+ }
+ if (cond == kCondGE) {
+ // Calculate `rs1 >= rhs` as `!(rs1 < rhs)` since there's only the SLT but no SGE.
+ __ Xori(rd, rd, 1);
+ }
+ break;
+
+ case kCondLE:
+ case kCondGT:
+ if (use_imm) {
+ // Calculate `rs1 <= imm` as `rs1 < imm + 1`.
+ DCHECK(IsInt<12>(imm + 1)); // The value that overflows would fail this check.
+ __ Slti(rd, rs1, imm + 1);
+ } else {
+ __ Slt(rd, rs2, rs1);
+ }
+ if ((cond == kCondGT) == use_imm) {
+ // Calculate `rs1 > imm` as `!(rs1 < imm + 1)` and calculate
+ // `rs1 <= rs2` as `!(rs2 < rs1)` since there's only the SLT but no SGE.
+ __ Xori(rd, rd, 1);
+ }
+ break;
+
+ case kCondB:
+ case kCondAE:
+ if (use_imm) {
+ // Sltiu sign-extends its 12-bit immediate operand before the comparison
+ // and thus lets us compare directly with unsigned values in the ranges
+ // [0, 0x7ff] and [0x[ffffffff]fffff800, 0x[ffffffff]ffffffff].
+ DCHECK(IsInt<12>(imm));
+ __ Sltiu(rd, rs1, imm);
+ } else {
+ __ Sltu(rd, rs1, rs2);
+ }
+ if (cond == kCondAE) {
+ // Calculate `rs1 AE rhs` as `!(rs1 B rhs)` since there's only the SLTU but no SGEU.
+ __ Xori(rd, rd, 1);
+ }
+ break;
+
+ case kCondBE:
+ case kCondA:
+ if (use_imm) {
+ // Calculate `rs1 BE imm` as `rs1 B imm + 1`.
+ // Sltiu sign-extends its 12-bit immediate operand before the comparison
+ // and thus lets us compare directly with unsigned values in the ranges
+ // [0, 0x7ff] and [0x[ffffffff]fffff800, 0x[ffffffff]ffffffff].
+ DCHECK(IsInt<12>(imm + 1)); // The value that overflows would fail this check.
+ __ Sltiu(rd, rs1, imm + 1);
+ } else {
+ __ Sltu(rd, rs2, rs1);
+ }
+ if ((cond == kCondA) == use_imm) {
+ // Calculate `rs1 A imm` as `!(rs1 B imm + 1)` and calculate
+ // `rs1 BE rs2` as `!(rs2 B rs1)` since there's only the SLTU but no SGEU.
+ __ Xori(rd, rd, 1);
+ }
+ break;
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateIntLongCompareAndBranch(IfCondition cond,
+ LocationSummary* locations,
+ Riscv64Label* label) {
+ XRegister left = locations->InAt(0).AsRegister<XRegister>();
+ Location right_location = locations->InAt(1);
+ if (right_location.IsConstant()) {
+ DCHECK_EQ(CodeGenerator::GetInt64ValueOf(right_location.GetConstant()), 0);
+ switch (cond) {
+ case kCondEQ:
+ case kCondBE: // <= 0 if zero
+ __ Beqz(left, label);
+ break;
+ case kCondNE:
+ case kCondA: // > 0 if non-zero
+ __ Bnez(left, label);
+ break;
+ case kCondLT:
+ __ Bltz(left, label);
+ break;
+ case kCondGE:
+ __ Bgez(left, label);
+ break;
+ case kCondLE:
+ __ Blez(left, label);
+ break;
+ case kCondGT:
+ __ Bgtz(left, label);
+ break;
+ case kCondB: // always false
+ break;
+ case kCondAE: // always true
+ __ J(label);
+ break;
+ }
+ } else {
+ XRegister right_reg = right_location.AsRegister<XRegister>();
+ switch (cond) {
+ case kCondEQ:
+ __ Beq(left, right_reg, label);
+ break;
+ case kCondNE:
+ __ Bne(left, right_reg, label);
+ break;
+ case kCondLT:
+ __ Blt(left, right_reg, label);
+ break;
+ case kCondGE:
+ __ Bge(left, right_reg, label);
+ break;
+ case kCondLE:
+ __ Ble(left, right_reg, label);
+ break;
+ case kCondGT:
+ __ Bgt(left, right_reg, label);
+ break;
+ case kCondB:
+ __ Bltu(left, right_reg, label);
+ break;
+ case kCondAE:
+ __ Bgeu(left, right_reg, label);
+ break;
+ case kCondBE:
+ __ Bleu(left, right_reg, label);
+ break;
+ case kCondA:
+ __ Bgtu(left, right_reg, label);
+ break;
+ }
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateFpCondition(IfCondition cond,
+ bool gt_bias,
+ DataType::Type type,
+ LocationSummary* locations,
+ Riscv64Label* label) {
+ // RISCV-V FP compare instructions yield the following values:
+ // l<r l=r l>r Unordered
+ // FEQ l,r 0 1 0 0
+ // FLT l,r 1 0 0 0
+ // FLT r,l 0 0 1 0
+ // FLE l,r 1 1 0 0
+ // FLE r,l 0 1 1 0
+ //
+ // We can calculate the `Compare` results using the following formulas:
+ // l<r l=r l>r Unordered
+ // Compare/gt_bias -1 0 1 1 = ((FLE l,r) ^ 1) - (FLT l,r)
+ // Compare/lt_bias -1 0 1 -1 = ((FLE r,l) - 1) + (FLT r,l)
+ // These are emitted in `VisitCompare()`.
+ //
+ // This function emits a fused `Condition(Compare(., .), 0)`. If we compare the
+ // `Compare` results above with 0, we get the following values and formulas:
+ // l<r l=r l>r Unordered
+ // CondEQ/- 0 1 0 0 = (FEQ l, r)
+ // CondNE/- 1 0 1 1 = (FEQ l, r) ^ 1
+ // CondLT/gt_bias 1 0 0 0 = (FLT l,r)
+ // CondLT/lt_bias 1 0 0 1 = (FLE r,l) ^ 1
+ // CondLE/gt_bias 1 1 0 0 = (FLE l,r)
+ // CondLE/lt_bias 1 1 0 1 = (FLT r,l) ^ 1
+ // CondGT/gt_bias 0 0 1 1 = (FLE l,r) ^ 1
+ // CondGT/lt_bias 0 0 1 0 = (FLT r,l)
+ // CondGE/gt_bias 0 1 1 1 = (FLT l,r) ^ 1
+ // CondGE/lt_bias 0 1 1 0 = (FLE r,l)
+ // (CondEQ/CondNE comparison with zero yields the same result with gt_bias and lt_bias.)
+ //
+ // If the condition is not materialized, the `^ 1` is not emitted,
+ // instead the condition is reversed by emitting BEQZ instead of BNEZ.
+
+ FRegister rs1 = locations->InAt(0).AsFpuRegister<FRegister>();
+ FRegister rs2 = locations->InAt(1).AsFpuRegister<FRegister>();
+
+ DCHECK_EQ(label != nullptr, locations->Out().IsInvalid());
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister rd =
+ (label != nullptr) ? srs.AllocateXRegister() : locations->Out().AsRegister<XRegister>();
+ bool reverse_condition = false;
+
+ switch (cond) {
+ case kCondEQ:
+ FEq(rd, rs1, rs2, type);
+ break;
+ case kCondNE:
+ FEq(rd, rs1, rs2, type);
+ reverse_condition = true;
+ break;
+ case kCondLT:
+ if (gt_bias) {
+ FLt(rd, rs1, rs2, type);
+ } else {
+ FLe(rd, rs2, rs1, type);
+ reverse_condition = true;
+ }
+ break;
+ case kCondLE:
+ if (gt_bias) {
+ FLe(rd, rs1, rs2, type);
+ } else {
+ FLt(rd, rs2, rs1, type);
+ reverse_condition = true;
+ }
+ break;
+ case kCondGT:
+ if (gt_bias) {
+ FLe(rd, rs1, rs2, type);
+ reverse_condition = true;
+ } else {
+ FLt(rd, rs2, rs1, type);
+ }
+ break;
+ case kCondGE:
+ if (gt_bias) {
+ FLt(rd, rs1, rs2, type);
+ reverse_condition = true;
+ } else {
+ FLe(rd, rs2, rs1, type);
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unexpected floating-point condition " << cond;
+ UNREACHABLE();
+ }
+
+ if (label != nullptr) {
+ if (reverse_condition) {
+ __ Beqz(rd, label);
+ } else {
+ __ Bnez(rd, label);
+ }
+ } else {
+ if (reverse_condition) {
+ __ Xori(rd, rd, 1);
+ }
+ }
+}
+
+void CodeGeneratorRISCV64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ XRegister obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check) {
+ GenerateReferenceLoadWithBakerReadBarrier(
+ instruction, ref, obj, offset, /*index=*/ Location::NoLocation(), temp, needs_null_check);
+}
+
+void CodeGeneratorRISCV64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ XRegister obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check) {
+ GenerateReferenceLoadWithBakerReadBarrier(
+ instruction, ref, obj, data_offset, index, temp, needs_null_check);
+}
+
+void CodeGeneratorRISCV64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ XRegister obj,
+ uint32_t offset,
+ Location index,
+ Location temp,
+ bool needs_null_check) {
+ // For now, use the same approach as for GC roots plus unpoison the reference if needed.
+ // TODO(riscv64): Implement checking if the holder is black.
+ UNUSED(temp);
+
+ XRegister reg = ref.AsRegister<XRegister>();
+ if (index.IsValid()) {
+ DCHECK(instruction->IsArrayGet());
+ DCHECK(!needs_null_check);
+ DCHECK(index.IsRegister());
+ // /* HeapReference<Object> */ ref = *(obj + index * element_size + offset)
+ DataType::Type type = DataType::Type::kReference;
+ DCHECK_EQ(type, instruction->GetType());
+ instruction_visitor_.ShNAdd(reg, index.AsRegister<XRegister>(), obj, type);
+ __ Loadwu(reg, reg, offset);
+ } else {
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ __ Loadwu(reg, obj, offset);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ }
+ MaybeUnpoisonHeapReference(reg);
+
+ // Slow path marking the reference.
+ XRegister tmp = RA; // Use RA as temp. It is clobbered in the slow path anyway.
+ SlowPathCodeRISCV64* slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathRISCV64(
+ instruction, ref, Location::RegisterLocation(tmp));
+ AddSlowPath(slow_path);
+
+ const int32_t entry_point_offset = ReadBarrierMarkEntrypointOffset(ref);
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ Loadd(tmp, TR, entry_point_offset);
+ __ Bnez(tmp, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorRISCV64::GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ UNUSED(instruction);
+ UNUSED(out);
+ UNUSED(ref);
+ UNUSED(obj);
+ UNUSED(offset);
+ UNUSED(index);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void CodeGeneratorRISCV64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ if (gUseReadBarrier) {
+ // Baker's read barriers shall be handled by the fast path
+ // (CodeGeneratorRISCV64::GenerateReferenceLoadWithBakerReadBarrier).
+ DCHECK(!kUseBakerReadBarrier);
+ // If heap poisoning is enabled, unpoisoning will be taken care of
+ // by the runtime within the slow path.
+ GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
+ } else if (kPoisonHeapReferences) {
+ UnpoisonHeapReference(out.AsRegister<XRegister>());
+ }
+}
+
+void CodeGeneratorRISCV64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+ Location out,
+ Location root) {
+ DCHECK(gUseReadBarrier);
+
+ // Insert a slow path based read barrier *after* the GC root load.
+ //
+ // Note that GC roots are not affected by heap poisoning, so we do
+ // not need to do anything special for this here.
+ SlowPathCodeRISCV64* slow_path =
+ new (GetScopedAllocator()) ReadBarrierForRootSlowPathRISCV64(instruction, out, root);
+ AddSlowPath(slow_path);
+
+ __ J(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void InstructionCodeGeneratorRISCV64::HandleGoto(HInstruction* instruction,
+ HBasicBlock* successor) {
+ if (successor->IsExitBlock()) {
+ DCHECK(instruction->GetPrevious()->AlwaysThrows());
+ return; // no code needed
+ }
+
+ HBasicBlock* block = instruction->GetBlock();
+ HInstruction* previous = instruction->GetPrevious();
+ HLoopInformation* info = block->GetLoopInformation();
+
+ if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
+ codegen_->MaybeIncrementHotness(/*is_frame_entry=*/ false);
+ GenerateSuspendCheck(info->GetSuspendCheck(), successor);
+ return; // `GenerateSuspendCheck()` emitted the jump.
+ }
+ if (block->IsEntryBlock() && previous != nullptr && previous->IsSuspendCheck()) {
+ GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+ }
+ if (!codegen_->GoesToNextBlock(block, successor)) {
+ __ J(codegen_->GetLabelOf(successor));
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::GenPackedSwitchWithCompares(XRegister adjusted,
+ XRegister temp,
+ uint32_t num_entries,
+ HBasicBlock* switch_block) {
+ // Note: The `adjusted` register holds `value - lower_bound`. If the `lower_bound` is 0,
+ // `adjusted` is the original `value` register and we must not clobber it. Otherwise,
+ // `adjusted` is the `temp`. The caller already emitted the `adjusted < num_entries` check.
+
+ // Create a set of compare/jumps.
+ ArrayRef<HBasicBlock* const> successors(switch_block->GetSuccessors());
+ uint32_t index = 0;
+ for (; num_entries - index >= 2u; index += 2u) {
+ // Jump to `successors[index]` if `value == lower_bound + index`.
+ // Note that `adjusted` holds `value - lower_bound - index`.
+ __ Beqz(adjusted, codegen_->GetLabelOf(successors[index]));
+ if (num_entries - index == 2u) {
+ break; // The last entry shall match, so the branch shall be unconditional.
+ }
+ // Jump to `successors[index + 1]` if `value == lower_bound + index + 1`.
+ // Modify `adjusted` to hold `value - lower_bound - index - 2` for this comparison.
+ __ Addi(temp, adjusted, -2);
+ adjusted = temp;
+ __ Bltz(adjusted, codegen_->GetLabelOf(successors[index + 1]));
+ }
+ // For the last entry, unconditionally jump to `successors[num_entries - 1]`.
+ __ J(codegen_->GetLabelOf(successors[num_entries - 1u]));
+}
+
+void InstructionCodeGeneratorRISCV64::GenTableBasedPackedSwitch(XRegister adjusted,
+ XRegister temp,
+ uint32_t num_entries,
+ HBasicBlock* switch_block) {
+ // Note: The `adjusted` register holds `value - lower_bound`. If the `lower_bound` is 0,
+ // `adjusted` is the original `value` register and we must not clobber it. Otherwise,
+ // `adjusted` is the `temp`. The caller already emitted the `adjusted < num_entries` check.
+
+ // Create a jump table.
+ ArenaVector<Riscv64Label*> labels(num_entries,
+ __ GetAllocator()->Adapter(kArenaAllocSwitchTable));
+ const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
+ for (uint32_t i = 0; i < num_entries; i++) {
+ labels[i] = codegen_->GetLabelOf(successors[i]);
+ }
+ JumpTable* table = __ CreateJumpTable(std::move(labels));
+
+ // Load the address of the jump table.
+ // Note: The `LoadLabelAddress()` emits AUIPC+ADD. It is possible to avoid the ADD and
+ // instead embed that offset in the LW below as well as all jump table entries but
+ // that would need some invasive changes in the jump table handling in the assembler.
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister table_base = srs.AllocateXRegister();
+ __ LoadLabelAddress(table_base, table->GetLabel());
+
+ // Load the PC difference from the jump table.
+ // TODO(riscv64): Use SH2ADD from the Zba extension.
+ __ Slli(temp, adjusted, 2);
+ __ Add(temp, temp, table_base);
+ __ Lw(temp, temp, 0);
+
+ // Compute the absolute target address by adding the table start address
+ // (the table contains offsets to targets relative to its start).
+ __ Add(temp, temp, table_base);
+ // And jump.
+ __ Jr(temp);
+}
+
+int32_t InstructionCodeGeneratorRISCV64::VecAddress(LocationSummary* locations,
+ size_t size,
+ /*out*/ XRegister* adjusted_base) {
+ UNUSED(locations);
+ UNUSED(size);
+ UNUSED(adjusted_base);
+ LOG(FATAL) << "Unimplemented";
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorRISCV64::GenConditionalMove(HSelect* select) {
+ UNUSED(select);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::HandleBinaryOp(HBinaryOperation* instruction) {
+ DCHECK_EQ(instruction->InputCount(), 2u);
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ DataType::Type type = instruction->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ HInstruction* right = instruction->InputAt(1);
+ bool can_use_imm = false;
+ if (instruction->IsMin() || instruction->IsMax()) {
+ can_use_imm = IsZeroBitPattern(instruction);
+ } else if (right->IsConstant()) {
+ int64_t imm = CodeGenerator::GetInt64ValueOf(right->AsConstant());
+ can_use_imm = IsInt<12>(instruction->IsSub() ? -imm : imm);
+ }
+ if (can_use_imm) {
+ locations->SetInAt(1, Location::ConstantLocation(right));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ }
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ if (instruction->IsMin() || instruction->IsMax()) {
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap);
+ } else {
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ }
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected " << instruction->DebugName() << " type " << type;
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::HandleBinaryOp(HBinaryOperation* instruction) {
+ DataType::Type type = instruction->GetType();
+ LocationSummary* locations = instruction->GetLocations();
+
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64: {
+ XRegister rd = locations->Out().AsRegister<XRegister>();
+ XRegister rs1 = locations->InAt(0).AsRegister<XRegister>();
+ Location rs2_location = locations->InAt(1);
+
+ bool use_imm = rs2_location.IsConstant();
+ XRegister rs2 = use_imm ? kNoXRegister : rs2_location.AsRegister<XRegister>();
+ int64_t imm = use_imm ? CodeGenerator::GetInt64ValueOf(rs2_location.GetConstant()) : 0;
+
+ if (instruction->IsAnd()) {
+ if (use_imm) {
+ __ Andi(rd, rs1, imm);
+ } else {
+ __ And(rd, rs1, rs2);
+ }
+ } else if (instruction->IsOr()) {
+ if (use_imm) {
+ __ Ori(rd, rs1, imm);
+ } else {
+ __ Or(rd, rs1, rs2);
+ }
+ } else if (instruction->IsXor()) {
+ if (use_imm) {
+ __ Xori(rd, rs1, imm);
+ } else {
+ __ Xor(rd, rs1, rs2);
+ }
+ } else if (instruction->IsAdd() || instruction->IsSub()) {
+ if (type == DataType::Type::kInt32) {
+ if (use_imm) {
+ __ Addiw(rd, rs1, instruction->IsSub() ? -imm : imm);
+ } else if (instruction->IsAdd()) {
+ __ Addw(rd, rs1, rs2);
+ } else {
+ DCHECK(instruction->IsSub());
+ __ Subw(rd, rs1, rs2);
+ }
+ } else {
+ if (use_imm) {
+ __ Addi(rd, rs1, instruction->IsSub() ? -imm : imm);
+ } else if (instruction->IsAdd()) {
+ __ Add(rd, rs1, rs2);
+ } else {
+ DCHECK(instruction->IsSub());
+ __ Sub(rd, rs1, rs2);
+ }
+ }
+ } else if (instruction->IsMin()) {
+ DCHECK_IMPLIES(use_imm, imm == 0);
+ __ Min(rd, rs1, use_imm ? Zero : rs2);
+ } else {
+ DCHECK(instruction->IsMax());
+ DCHECK_IMPLIES(use_imm, imm == 0);
+ __ Max(rd, rs1, use_imm ? Zero : rs2);
+ }
+ break;
+ }
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64: {
+ FRegister rd = locations->Out().AsFpuRegister<FRegister>();
+ FRegister rs1 = locations->InAt(0).AsFpuRegister<FRegister>();
+ FRegister rs2 = locations->InAt(1).AsFpuRegister<FRegister>();
+ if (instruction->IsAdd()) {
+ FAdd(rd, rs1, rs2, type);
+ } else if (instruction->IsSub()) {
+ FSub(rd, rs1, rs2, type);
+ } else {
+ DCHECK(instruction->IsMin() || instruction->IsMax());
+ // If one of the operands is NaN and the other is not, riscv64 instructions FMIN/FMAX
+ // return the other operand while we want to return the NaN operand.
+ DCHECK_NE(rd, rs1); // Requested `Location::kOutputOverlap`.
+ DCHECK_NE(rd, rs2); // Requested `Location::kOutputOverlap`.
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ XRegister tmp2 = srs.AllocateXRegister();
+ Riscv64Label done;
+ // Return `rs1` if it's NaN.
+ FClass(tmp, rs1, type);
+ __ Li(tmp2, kFClassNaNMinValue);
+ FMv(rd, rs1, type);
+ __ Bgeu(tmp, tmp2, &done);
+ // Return `rs2` if it's NaN.
+ FClass(tmp, rs2, type);
+ FMv(rd, rs2, type);
+ __ Bgeu(tmp, tmp2, &done);
+ // Calculate Min/Max for non-NaN arguments.
+ if (instruction->IsMin()) {
+ FMin(rd, rs1, rs2, type);
+ } else {
+ FMax(rd, rs1, rs2, type);
+ }
+ __ Bind(&done);
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected binary operation type " << type;
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderRISCV64::HandleCondition(HCondition* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ switch (instruction->InputAt(0)->GetType()) {
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ break;
+
+ default: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ HInstruction* rhs = instruction->InputAt(1);
+ bool use_imm = false;
+ if (rhs->IsConstant()) {
+ int64_t imm = CodeGenerator::GetInt64ValueOf(rhs->AsConstant());
+ if (instruction->IsEmittedAtUseSite()) {
+ // For `HIf`, materialize all non-zero constants with an `HParallelMove`.
+ // Note: For certain constants and conditions, the code could be improved.
+ // For example, 2048 takes two instructions to materialize but the negative
+ // -2048 could be embedded in ADDI for EQ/NE comparison.
+ use_imm = (imm == 0);
+ } else {
+ // Constants that cannot be embedded in an instruction's 12-bit immediate shall be
+ // materialized with an `HParallelMove`. This simplifies the code and avoids cases
+ // with arithmetic overflow. Adjust the `imm` if needed for a particular instruction.
+ switch (instruction->GetCondition()) {
+ case kCondEQ:
+ case kCondNE:
+ imm = -imm; // ADDI with negative immediate (there is no SUBI).
+ break;
+ case kCondLE:
+ case kCondGT:
+ case kCondBE:
+ case kCondA:
+ imm += 1; // SLTI/SLTIU with adjusted immediate (there is no SLEI/SLEIU).
+ break;
+ default:
+ break;
+ }
+ use_imm = IsInt<12>(imm);
+ }
+ }
+ if (use_imm) {
+ locations->SetInAt(1, Location::ConstantLocation(rhs));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
+ break;
+ }
+ }
+ if (!instruction->IsEmittedAtUseSite()) {
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::HandleCondition(HCondition* instruction) {
+ if (instruction->IsEmittedAtUseSite()) {
+ return;
+ }
+
+ DataType::Type type = instruction->InputAt(0)->GetType();
+ LocationSummary* locations = instruction->GetLocations();
+ switch (type) {
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateFpCondition(instruction->GetCondition(), instruction->IsGtBias(), type, locations);
+ return;
+ default:
+ // Integral types and reference equality.
+ GenerateIntLongCondition(instruction->GetCondition(), locations);
+ return;
+ }
+}
+
+void LocationsBuilderRISCV64::HandleShift(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsShl() ||
+ instruction->IsShr() ||
+ instruction->IsUShr() ||
+ instruction->IsRor());
+
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ DataType::Type type = instruction->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected shift type " << type;
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::HandleShift(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsShl() ||
+ instruction->IsShr() ||
+ instruction->IsUShr() ||
+ instruction->IsRor());
+ LocationSummary* locations = instruction->GetLocations();
+ DataType::Type type = instruction->GetType();
+
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64: {
+ XRegister rd = locations->Out().AsRegister<XRegister>();
+ XRegister rs1 = locations->InAt(0).AsRegister<XRegister>();
+ Location rs2_location = locations->InAt(1);
+
+ if (rs2_location.IsConstant()) {
+ int64_t imm = CodeGenerator::GetInt64ValueOf(rs2_location.GetConstant());
+ uint32_t shamt =
+ imm & (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance);
+
+ if (shamt == 0) {
+ if (rd != rs1) {
+ __ Mv(rd, rs1);
+ }
+ } else if (type == DataType::Type::kInt32) {
+ if (instruction->IsShl()) {
+ __ Slliw(rd, rs1, shamt);
+ } else if (instruction->IsShr()) {
+ __ Sraiw(rd, rs1, shamt);
+ } else if (instruction->IsUShr()) {
+ __ Srliw(rd, rs1, shamt);
+ } else {
+ DCHECK(instruction->IsRor());
+ __ Roriw(rd, rs1, shamt);
+ }
+ } else {
+ if (instruction->IsShl()) {
+ __ Slli(rd, rs1, shamt);
+ } else if (instruction->IsShr()) {
+ __ Srai(rd, rs1, shamt);
+ } else if (instruction->IsUShr()) {
+ __ Srli(rd, rs1, shamt);
+ } else {
+ DCHECK(instruction->IsRor());
+ __ Rori(rd, rs1, shamt);
+ }
+ }
+ } else {
+ XRegister rs2 = rs2_location.AsRegister<XRegister>();
+ if (type == DataType::Type::kInt32) {
+ if (instruction->IsShl()) {
+ __ Sllw(rd, rs1, rs2);
+ } else if (instruction->IsShr()) {
+ __ Sraw(rd, rs1, rs2);
+ } else if (instruction->IsUShr()) {
+ __ Srlw(rd, rs1, rs2);
+ } else {
+ DCHECK(instruction->IsRor());
+ __ Rorw(rd, rs1, rs2);
+ }
+ } else {
+ if (instruction->IsShl()) {
+ __ Sll(rd, rs1, rs2);
+ } else if (instruction->IsShr()) {
+ __ Sra(rd, rs1, rs2);
+ } else if (instruction->IsUShr()) {
+ __ Srl(rd, rs1, rs2);
+ } else {
+ DCHECK(instruction->IsRor());
+ __ Ror(rd, rs1, rs2);
+ }
+ }
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected shift operation type " << type;
+ }
+}
+
+void CodeGeneratorRISCV64::MarkGCCard(XRegister object,
+ XRegister value,
+ bool value_can_be_null) {
+ Riscv64Label done;
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister card = srs.AllocateXRegister();
+ XRegister temp = srs.AllocateXRegister();
+ if (value_can_be_null) {
+ __ Beqz(value, &done);
+ }
+ // Load the address of the card table into `card`.
+ __ Loadd(card, TR, Thread::CardTableOffset<kRiscv64PointerSize>().Int32Value());
+
+ // Calculate the address of the card corresponding to `object`.
+ __ Srli(temp, object, gc::accounting::CardTable::kCardShift);
+ __ Add(temp, card, temp);
+ // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
+ // `object`'s card.
+ //
+ // Register `card` contains the address of the card table. Note that the card
+ // table's base is biased during its creation so that it always starts at an
+ // address whose least-significant byte is equal to `kCardDirty` (see
+ // art::gc::accounting::CardTable::Create). Therefore the SB instruction
+ // below writes the `kCardDirty` (byte) value into the `object`'s card
+ // (located at `card + object >> kCardShift`).
+ //
+ // This dual use of the value in register `card` (1. to calculate the location
+ // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
+ // (no need to explicitly load `kCardDirty` as an immediate value).
+ __ Storeb(card, temp, 0);
+ if (value_can_be_null) {
+ __ Bind(&done);
+ }
+}
+
+void LocationsBuilderRISCV64::HandleFieldSet(HInstruction* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, ValueLocationForStore(instruction->InputAt(1)));
+}
+
+void InstructionCodeGeneratorRISCV64::HandleFieldSet(HInstruction* instruction,
+ const FieldInfo& field_info,
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind) {
+ DataType::Type type = field_info.GetFieldType();
+ LocationSummary* locations = instruction->GetLocations();
+ XRegister obj = locations->InAt(0).AsRegister<XRegister>();
+ Location value = locations->InAt(1);
+ DCHECK_IMPLIES(value.IsConstant(), IsZeroBitPattern(value.GetConstant()));
+ bool is_volatile = field_info.IsVolatile();
+ uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+ bool is_predicated =
+ instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
+
+ Riscv64Label pred_is_null;
+ if (is_predicated) {
+ __ Beqz(obj, &pred_is_null);
+ }
+
+ if (is_volatile) {
+ if (DataType::Size(type) >= 4u) {
+ // Use AMOSWAP for 32-bit and 64-bit data types.
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister swap_src = kNoXRegister;
+ if (kPoisonHeapReferences && type == DataType::Type::kReference && !value.IsConstant()) {
+ swap_src = srs.AllocateXRegister();
+ __ Mv(swap_src, value.AsRegister<XRegister>());
+ codegen_->PoisonHeapReference(swap_src);
+ } else if (type == DataType::Type::kFloat64 && !value.IsConstant()) {
+ swap_src = srs.AllocateXRegister();
+ __ FMvXD(swap_src, value.AsFpuRegister<FRegister>());
+ } else if (type == DataType::Type::kFloat32 && !value.IsConstant()) {
+ swap_src = srs.AllocateXRegister();
+ __ FMvXW(swap_src, value.AsFpuRegister<FRegister>());
+ } else {
+ swap_src = InputXRegisterOrZero(value);
+ }
+ XRegister addr = srs.AllocateXRegister();
+ __ AddConst64(addr, obj, offset);
+ if (DataType::Is64BitType(type)) {
+ __ AmoSwapD(Zero, swap_src, addr, AqRl::kRelease);
+ } else {
+ __ AmoSwapW(Zero, swap_src, addr, AqRl::kRelease);
+ }
+ } else {
+ // Use fences for smaller data types.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+ Store(value, obj, offset, type);
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+ } else {
+ Store(value, obj, offset, type);
+ }
+
+ if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1)) &&
+ write_barrier_kind != WriteBarrierKind::kDontEmit) {
+ codegen_->MarkGCCard(
+ obj,
+ value.AsRegister<XRegister>(),
+ value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck);
+ }
+
+ if (is_predicated) {
+ __ Bind(&pred_is_null);
+ }
+}
+
+void LocationsBuilderRISCV64::HandleFieldGet(HInstruction* instruction) {
+ DCHECK(instruction->IsInstanceFieldGet() ||
+ instruction->IsStaticFieldGet() ||
+ instruction->IsPredicatedInstanceFieldGet());
+
+ bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
+
+ bool object_field_get_with_read_barrier =
+ gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
+ instruction,
+ object_field_get_with_read_barrier
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall);
+
+ // Input for object receiver.
+ locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister());
+
+ if (DataType::IsFloatingPointType(instruction->GetType())) {
+ if (is_predicated) {
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ } else {
+ locations->SetOut(Location::RequiresFpuRegister());
+ }
+ } else {
+ if (is_predicated) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ } else {
+ // The output overlaps for an object field get when read barriers
+ // are enabled: we do not want the load to overwrite the object's
+ // location, as we need it to emit the read barrier.
+ locations->SetOut(Location::RequiresRegister(),
+ object_field_get_with_read_barrier ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap);
+ }
+ }
+
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorRISCV64::GenerateFieldLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::HandleFieldGet(HInstruction* instruction,
+ const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldGet() ||
+ instruction->IsStaticFieldGet() ||
+ instruction->IsPredicatedInstanceFieldGet());
+ DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
+ DataType::Type type = instruction->GetType();
+ LocationSummary* locations = instruction->GetLocations();
+ Location obj_loc = locations->InAt(instruction->IsPredicatedInstanceFieldGet() ? 1 : 0);
+ XRegister obj = obj_loc.AsRegister<XRegister>();
+ Location dst_loc = locations->Out();
+ bool is_volatile = field_info.IsVolatile();
+ uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+
+ if (is_volatile) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+
+ if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Object> */ dst = *(obj + offset)
+ Location temp_loc = locations->GetTemp(0);
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorRISCV64::GenerateFieldLoadWithBakerReadBarrier call.
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ dst_loc,
+ obj,
+ offset,
+ temp_loc,
+ /* needs_null_check= */ true);
+ } else {
+ Load(dst_loc, obj, offset, type);
+ }
+
+ if (is_volatile) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+
+ if (type == DataType::Type::kReference && !(gUseReadBarrier && kUseBakerReadBarrier)) {
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, dst_loc, dst_loc, obj_loc, offset);
+ }
+}
+
+void LocationsBuilderRISCV64::VisitAbove(HAbove* instruction) {
+ HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitAbove(HAbove* instruction) {
+ HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitAboveOrEqual(HAboveOrEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitAboveOrEqual(HAboveOrEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected abs type " << abs->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = abs->GetLocations();
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32: {
+ XRegister in = locations->InAt(0).AsRegister<XRegister>();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ __ Sraiw(tmp, in, 31);
+ __ Xor(out, in, tmp);
+ __ Subw(out, out, tmp);
+ break;
+ }
+ case DataType::Type::kInt64: {
+ XRegister in = locations->InAt(0).AsRegister<XRegister>();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ __ Srai(tmp, in, 63);
+ __ Xor(out, in, tmp);
+ __ Sub(out, out, tmp);
+ break;
+ }
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ FAbs(locations->Out().AsFpuRegister<FRegister>(),
+ locations->InAt(0).AsFpuRegister<FRegister>(),
+ abs->GetResultType());
+ break;
+ default:
+ LOG(FATAL) << "Unexpected abs type " << abs->GetResultType();
+ }
+}
+
+void LocationsBuilderRISCV64::VisitAdd(HAdd* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitAdd(HAdd* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitAnd(HAnd* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitAnd(HAnd* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitArrayGet(HArrayGet* instruction) {
+ DataType::Type type = instruction->GetType();
+ bool object_array_get_with_read_barrier = gUseReadBarrier && (type == DataType::Type::kReference);
+ LocationSummary* locations = new (GetGraph()->GetAllocator())
+ LocationSummary(instruction,
+ object_array_get_with_read_barrier ? LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ if (DataType::IsFloatingPointType(type)) {
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ } else {
+ // The output overlaps in the case of an object array get with
+ // read barriers enabled: we do not want the move to overwrite the
+ // array's location, as we need it to emit the read barrier.
+ locations->SetOut(
+ Location::RequiresRegister(),
+ object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+ }
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorRISCV64::GenerateArrayLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitArrayGet(HArrayGet* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location obj_loc = locations->InAt(0);
+ XRegister obj = obj_loc.AsRegister<XRegister>();
+ Location out_loc = locations->Out();
+ Location index = locations->InAt(1);
+ uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+ DataType::Type type = instruction->GetType();
+ const bool maybe_compressed_char_at =
+ mirror::kUseStringCompression && instruction->IsStringCharAt();
+
+ Riscv64Label string_char_at_done;
+ if (maybe_compressed_char_at) {
+ DCHECK_EQ(type, DataType::Type::kUint16);
+ uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ Riscv64Label uncompressed_load;
+ {
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ __ Loadw(tmp, obj, count_offset);
+ __ Andi(tmp, tmp, 0x1);
+ static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+ "Expecting 0=compressed, 1=uncompressed");
+ __ Bnez(tmp, &uncompressed_load);
+ }
+ XRegister out = out_loc.AsRegister<XRegister>();
+ if (index.IsConstant()) {
+ int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+ __ Loadbu(out, obj, data_offset + const_index);
+ } else {
+ __ Add(out, obj, index.AsRegister<XRegister>());
+ __ Loadbu(out, out, data_offset);
+ }
+ __ J(&string_char_at_done);
+ __ Bind(&uncompressed_load);
+ }
+
+ if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) {
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ // /* HeapReference<Object> */ out =
+ // *(obj + data_offset + index * sizeof(HeapReference<Object>))
+ // Note that a potential implicit null check could be handled in these
+ // `CodeGeneratorRISCV64::Generate{Array,Field}LoadWithBakerReadBarrier()` calls
+ // but we currently do not support implicit null checks on `HArrayGet`.
+ DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
+ Location temp = locations->GetTemp(0);
+ if (index.IsConstant()) {
+ // Array load with a constant index can be treated as a field load.
+ static constexpr size_t shift = DataType::SizeShift(DataType::Type::kReference);
+ size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << shift) + data_offset;
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out_loc,
+ obj,
+ offset,
+ temp,
+ /* needs_null_check= */ false);
+ } else {
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction,
+ out_loc,
+ obj,
+ data_offset,
+ index,
+ temp,
+ /* needs_null_check= */ false);
+ }
+ } else if (index.IsConstant()) {
+ int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+ int32_t offset = data_offset + (const_index << DataType::SizeShift(type));
+ Load(out_loc, obj, offset, type);
+ if (type == DataType::Type::kReference) {
+ DCHECK(!(gUseReadBarrier && kUseBakerReadBarrier));
+ // If read barriers are enabled, emit read barriers other than Baker's using
+ // a slow path (and also unpoison the loaded reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+ }
+ } else {
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ ShNAdd(tmp, index.AsRegister<XRegister>(), obj, type);
+ Load(out_loc, tmp, data_offset, type);
+ if (type == DataType::Type::kReference) {
+ DCHECK(!(gUseReadBarrier && kUseBakerReadBarrier));
+ // If read barriers are enabled, emit read barriers other than Baker's using
+ // a slow path (and also unpoison the loaded reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(
+ instruction, out_loc, out_loc, obj_loc, data_offset, index);
+ }
+ }
+
+ if (maybe_compressed_char_at) {
+ __ Bind(&string_char_at_done);
+ }
+}
+
+void LocationsBuilderRISCV64::VisitArrayLength(HArrayLength* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitArrayLength(HArrayLength* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
+ XRegister obj = locations->InAt(0).AsRegister<XRegister>();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ __ Loadwu(out, obj, offset); // Unsigned for string length; does not matter for other arrays.
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // Mask out compression flag from String's array length.
+ if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+ __ Srli(out, out, 1u);
+ }
+}
+
+void LocationsBuilderRISCV64::VisitArraySet(HArraySet* instruction) {
+ bool needs_type_check = instruction->NeedsTypeCheck();
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
+ instruction,
+ needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ locations->SetInAt(2, ValueLocationForStore(instruction->GetValue()));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitArraySet(HArraySet* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XRegister array = locations->InAt(0).AsRegister<XRegister>();
+ Location index = locations->InAt(1);
+ Location value = locations->InAt(2);
+ DataType::Type value_type = instruction->GetComponentType();
+ bool needs_type_check = instruction->NeedsTypeCheck();
+ bool needs_write_barrier =
+ CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+ size_t data_offset = mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value();
+ SlowPathCodeRISCV64* slow_path = nullptr;
+
+ if (needs_write_barrier) {
+ DCHECK_EQ(value_type, DataType::Type::kReference);
+ DCHECK(!value.IsConstant());
+ Riscv64Label do_store;
+
+ bool can_value_be_null = instruction->GetValueCanBeNull();
+ if (can_value_be_null) {
+ __ Beqz(value.AsRegister<XRegister>(), &do_store);
+ }
+
+ if (needs_type_check) {
+ slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathRISCV64(instruction);
+ codegen_->AddSlowPath(slow_path);
+
+ uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+ uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+ uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister temp1 = srs.AllocateXRegister();
+ XRegister temp2 = srs.AllocateXRegister();
+
+ // Note that when read barriers are enabled, the type checks are performed
+ // without read barriers. This is fine, even in the case where a class object
+ // is in the from-space after the flip, as a comparison involving such a type
+ // would not produce a false positive; it may of course produce a false
+ // negative, in which case we would take the ArraySet slow path.
+
+ // /* HeapReference<Class> */ temp1 = array->klass_
+ __ Loadwu(temp1, array, class_offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ codegen_->MaybeUnpoisonHeapReference(temp1);
+
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ __ Loadwu(temp2, temp1, component_offset);
+ // /* HeapReference<Class> */ temp1 = value->klass_
+ __ Loadwu(temp1, value.AsRegister<XRegister>(), class_offset);
+ // If heap poisoning is enabled, no need to unpoison `temp1`
+ // nor `temp2`, as we are comparing two poisoned references.
+ if (instruction->StaticTypeOfArrayIsObjectArray()) {
+ Riscv64Label do_put;
+ __ Beq(temp1, temp2, &do_put);
+ // If heap poisoning is enabled, the `temp2` reference has
+ // not been unpoisoned yet; unpoison it now.
+ codegen_->MaybeUnpoisonHeapReference(temp2);
+
+ // /* HeapReference<Class> */ temp1 = temp2->super_class_
+ __ Loadwu(temp1, temp2, super_offset);
+ // If heap poisoning is enabled, no need to unpoison
+ // `temp1`, as we are comparing against null below.
+ __ Bnez(temp1, slow_path->GetEntryLabel());
+ __ Bind(&do_put);
+ } else {
+ __ Bne(temp1, temp2, slow_path->GetEntryLabel());
+ }
+ }
+
+ if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
+ DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck)
+ << " Already null checked so we shouldn't do it again.";
+ codegen_->MarkGCCard(array, value.AsRegister<XRegister>(), /* emit_null_check= */ false);
+ }
+
+ if (can_value_be_null) {
+ __ Bind(&do_store);
+ }
+ }
+
+ if (index.IsConstant()) {
+ int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+ int32_t offset = data_offset + (const_index << DataType::SizeShift(value_type));
+ Store(value, array, offset, value_type);
+ } else {
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ ShNAdd(tmp, index.AsRegister<XRegister>(), array, value_type);
+ Store(value, tmp, data_offset, value_type);
+ }
+ // There must be no instructions between the `Store()` and the `MaybeRecordImplicitNullCheck()`.
+ // We can avoid this if the type check makes the null check unconditionally.
+ DCHECK_IMPLIES(needs_type_check, needs_write_barrier);
+ if (!(needs_type_check && !instruction->GetValueCanBeNull())) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetExitLabel());
+ }
+}
+
+void LocationsBuilderRISCV64::VisitBelow(HBelow* instruction) {
+ HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitBelow(HBelow* instruction) {
+ HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitBelowOrEqual(HBelowOrEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitBelowOrEqual(HBelowOrEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitBooleanNot(HBooleanNot* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitBooleanNot(HBooleanNot* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ __ Xori(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>(), 1);
+}
+
+void LocationsBuilderRISCV64::VisitBoundsCheck(HBoundsCheck* instruction) {
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
+
+ HInstruction* index = instruction->InputAt(0);
+ HInstruction* length = instruction->InputAt(1);
+
+ bool const_index = false;
+ bool const_length = false;
+
+ if (length->IsConstant()) {
+ if (index->IsConstant()) {
+ const_index = true;
+ const_length = true;
+ } else {
+ int32_t length_value = length->AsIntConstant()->GetValue();
+ if (length_value == 0 || length_value == 1) {
+ const_length = true;
+ }
+ }
+ } else if (index->IsConstant()) {
+ int32_t index_value = index->AsIntConstant()->GetValue();
+ if (index_value <= 0) {
+ const_index = true;
+ }
+ }
+
+ locations->SetInAt(
+ 0,
+ const_index ? Location::ConstantLocation(index) : Location::RequiresRegister());
+ locations->SetInAt(
+ 1,
+ const_length ? Location::ConstantLocation(length) : Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitBoundsCheck(HBoundsCheck* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location index_loc = locations->InAt(0);
+ Location length_loc = locations->InAt(1);
+
+ if (length_loc.IsConstant()) {
+ int32_t length = length_loc.GetConstant()->AsIntConstant()->GetValue();
+ if (index_loc.IsConstant()) {
+ int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue();
+ if (index < 0 || index >= length) {
+ BoundsCheckSlowPathRISCV64* slow_path =
+ new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathRISCV64(instruction);
+ codegen_->AddSlowPath(slow_path);
+ __ J(slow_path->GetEntryLabel());
+ } else {
+ // Nothing to be done.
+ }
+ return;
+ }
+
+ BoundsCheckSlowPathRISCV64* slow_path =
+ new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathRISCV64(instruction);
+ codegen_->AddSlowPath(slow_path);
+ XRegister index = index_loc.AsRegister<XRegister>();
+ if (length == 0) {
+ __ J(slow_path->GetEntryLabel());
+ } else {
+ DCHECK_EQ(length, 1);
+ __ Bnez(index, slow_path->GetEntryLabel());
+ }
+ } else {
+ XRegister length = length_loc.AsRegister<XRegister>();
+ BoundsCheckSlowPathRISCV64* slow_path =
+ new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathRISCV64(instruction);
+ codegen_->AddSlowPath(slow_path);
+ if (index_loc.IsConstant()) {
+ int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue();
+ if (index < 0) {
+ __ J(slow_path->GetEntryLabel());
+ } else {
+ DCHECK_EQ(index, 0);
+ __ Blez(length, slow_path->GetEntryLabel());
+ }
+ } else {
+ XRegister index = index_loc.AsRegister<XRegister>();
+ __ Bgeu(index, length, slow_path->GetEntryLabel());
+ }
+ }
+}
+
+void LocationsBuilderRISCV64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
+ // Nothing to do, this should be removed during prepare for register allocator.
+ LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
+ // Nothing to do, this should be removed during prepare for register allocator.
+ LOG(FATAL) << "Unreachable";
+}
+
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+ if (gUseReadBarrier &&
+ (kUseBakerReadBarrier ||
+ type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ return 1;
+ }
+ return 0;
+}
+
+// Interface case has 3 temps, one for holding the number of interfaces, one for the current
+// interface pointer, one for loading the current interface.
+// The other checks have one temp for loading the object's class and maybe a temp for read barrier.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+ if (type_check_kind == TypeCheckKind::kInterfaceCheck)
+ return 3;
+
+ return 1 + NumberOfInstanceOfTemps(type_check_kind);
+}
+
+void LocationsBuilderRISCV64::VisitCheckCast(HCheckCast* instruction) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
+ locations->SetInAt(0, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
+ locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitCheckCast(HCheckCast* instruction) {
+TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ LocationSummary* locations = instruction->GetLocations();
+ Location obj_loc = locations->InAt(0);
+ XRegister obj = obj_loc.AsRegister<XRegister>();
+ Location cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+ ? Location::NoLocation()
+ : locations->InAt(1);
+ Location temp_loc = locations->GetTemp(0);
+ XRegister temp = temp_loc.AsRegister<XRegister>();
+ const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+ DCHECK_GE(num_temps, 1u);
+ DCHECK_LE(num_temps, 3u);
+ Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
+ Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
+ const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+ const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+ const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+ const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+ const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+ const uint32_t object_array_data_offset =
+ mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+ Riscv64Label done;
+
+ bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
+ SlowPathCodeRISCV64* slow_path =
+ new (codegen_->GetScopedAllocator()) TypeCheckSlowPathRISCV64(
+ instruction, is_type_check_slow_path_fatal);
+ codegen_->AddSlowPath(slow_path);
+
+ // Avoid this check if we know `obj` is not null.
+ if (instruction->MustDoNullCheck()) {
+ __ Beqz(obj, &done);
+ }
+
+ switch (type_check_kind) {
+ case TypeCheckKind::kExactCheck:
+ case TypeCheckKind::kArrayCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // Jump to slow path for throwing the exception or doing a
+ // more involved array check.
+ __ Bne(temp, cls.AsRegister<XRegister>(), slow_path->GetEntryLabel());
+ break;
+ }
+
+ case TypeCheckKind::kAbstractClassCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // If the class is abstract, we eagerly fetch the super class of the
+ // object to avoid doing a comparison we know will fail.
+ Riscv64Label loop;
+ __ Bind(&loop);
+ // /* HeapReference<Class> */ temp = temp->super_class_
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ super_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // If the class reference currently in `temp` is null, jump to the slow path to throw the
+ // exception.
+ __ Beqz(temp, slow_path->GetEntryLabel());
+ // Otherwise, compare the classes.
+ __ Bne(temp, cls.AsRegister<XRegister>(), &loop);
+ break;
+ }
+
+ case TypeCheckKind::kClassHierarchyCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // Walk over the class hierarchy to find a match.
+ Riscv64Label loop;
+ __ Bind(&loop);
+ __ Beq(temp, cls.AsRegister<XRegister>(), &done);
+ // /* HeapReference<Class> */ temp = temp->super_class_
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ super_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // If the class reference currently in `temp` is null, jump to the slow path to throw the
+ // exception. Otherwise, jump to the beginning of the loop.
+ __ Bnez(temp, &loop);
+ __ J(slow_path->GetEntryLabel());
+ break;
+ }
+
+ case TypeCheckKind::kArrayObjectCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // Do an exact check.
+ __ Beq(temp, cls.AsRegister<XRegister>(), &done);
+ // Otherwise, we need to check that the object's class is a non-primitive array.
+ // /* HeapReference<Class> */ temp = temp->component_type_
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ component_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // If the component type is null, jump to the slow path to throw the exception.
+ __ Beqz(temp, slow_path->GetEntryLabel());
+ // Otherwise, the object is indeed an array, further check that this component
+ // type is not a primitive type.
+ __ Loadhu(temp, temp, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Bnez(temp, slow_path->GetEntryLabel());
+ break;
+ }
+
+ case TypeCheckKind::kUnresolvedCheck:
+ // We always go into the type check slow path for the unresolved check case.
+ // We cannot directly call the CheckCast runtime entry point
+ // without resorting to a type checking slow path here (i.e. by
+ // calling InvokeRuntime directly), as it would require to
+ // assign fixed registers for the inputs of this HInstanceOf
+ // instruction (following the runtime calling convention), which
+ // might be cluttered by the potential first read barrier
+ // emission at the beginning of this method.
+ __ J(slow_path->GetEntryLabel());
+ break;
+
+ case TypeCheckKind::kInterfaceCheck: {
+ // Avoid read barriers to improve performance of the fast path. We can not get false
+ // positives by doing this. False negatives are handled by the slow path.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // /* HeapReference<Class> */ temp = temp->iftable_
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ iftable_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ XRegister temp2 = maybe_temp2_loc.AsRegister<XRegister>();
+ XRegister temp3 = maybe_temp3_loc.AsRegister<XRegister>();
+ // Iftable is never null.
+ __ Loadw(temp2, temp, array_length_offset);
+ // Loop through the iftable and check if any class matches.
+ Riscv64Label loop;
+ __ Bind(&loop);
+ __ Beqz(temp2, slow_path->GetEntryLabel());
+ __ Lwu(temp3, temp, object_array_data_offset);
+ codegen_->MaybeUnpoisonHeapReference(temp3);
+ // Go to next interface.
+ __ Addi(temp, temp, 2 * kHeapReferenceSize);
+ __ Addi(temp2, temp2, -2);
+ // Compare the classes and continue the loop if they do not match.
+ __ Bne(temp3, cls.AsRegister<XRegister>(), &loop);
+ break;
+ }
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, temp);
+ __ Bnez(temp, slow_path->GetEntryLabel());
+ break;
+ }
+ }
+
+ __ Bind(&done);
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void LocationsBuilderRISCV64::VisitClassTableGet(HClassTableGet* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitClassTableGet(HClassTableGet* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XRegister in = locations->InAt(0).AsRegister<XRegister>();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
+ MemberOffset method_offset =
+ mirror::Class::EmbeddedVTableEntryOffset(instruction->GetIndex(), kRiscv64PointerSize);
+ __ Loadd(out, in, method_offset.SizeValue());
+ } else {
+ uint32_t method_offset = dchecked_integral_cast<uint32_t>(
+ ImTable::OffsetOfElement(instruction->GetIndex(), kRiscv64PointerSize));
+ __ Loadd(out, in, mirror::Class::ImtPtrOffset(kRiscv64PointerSize).Uint32Value());
+ __ Loadd(out, out, method_offset);
+ }
+}
+
+static int32_t GetExceptionTlsOffset() {
+ return Thread::ExceptionOffset<kRiscv64PointerSize>().Int32Value();
+}
+
+void LocationsBuilderRISCV64::VisitClearException(HClearException* instruction) {
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitClearException(
+ [[maybe_unused]] HClearException* instruction) {
+ __ Stored(Zero, TR, GetExceptionTlsOffset());
+}
+
+void LocationsBuilderRISCV64::VisitClinitCheck(HClinitCheck* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
+ instruction, LocationSummary::kCallOnSlowPath);
+ locations->SetInAt(0, Location::RequiresRegister());
+ if (instruction->HasUses()) {
+ locations->SetOut(Location::SameAsFirstInput());
+ }
+ // Rely on the type initialization to save everything we need.
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitClinitCheck(HClinitCheck* instruction) {
+ // We assume the class is not null.
+ SlowPathCodeRISCV64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathRISCV64(
+ instruction->GetLoadClass(), instruction);
+ codegen_->AddSlowPath(slow_path);
+ GenerateClassInitializationCheck(slow_path,
+ instruction->GetLocations()->InAt(0).AsRegister<XRegister>());
+}
+
+void LocationsBuilderRISCV64::VisitCompare(HCompare* instruction) {
+ DataType::Type in_type = instruction->InputAt(0)->GetType();
+
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+
+ switch (in_type) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, RegisterOrZeroBitPatternLocation(instruction->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type for compare operation " << in_type;
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitCompare(HCompare* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XRegister result = locations->Out().AsRegister<XRegister>();
+ DataType::Type in_type = instruction->InputAt(0)->GetType();
+
+ // 0 if: left == right
+ // 1 if: left > right
+ // -1 if: left < right
+ switch (in_type) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64: {
+ XRegister left = locations->InAt(0).AsRegister<XRegister>();
+ XRegister right = InputXRegisterOrZero(locations->InAt(1));
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ __ Slt(tmp, left, right);
+ __ Slt(result, right, left);
+ __ Sub(result, result, tmp);
+ break;
+ }
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64: {
+ FRegister left = locations->InAt(0).AsFpuRegister<FRegister>();
+ FRegister right = locations->InAt(1).AsFpuRegister<FRegister>();
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ if (instruction->IsGtBias()) {
+ // ((FLE l,r) ^ 1) - (FLT l,r); see `GenerateFpCondition()`.
+ FLe(tmp, left, right, in_type);
+ FLt(result, left, right, in_type);
+ __ Xori(tmp, tmp, 1);
+ __ Sub(result, tmp, result);
+ } else {
+ // ((FLE r,l) - 1) + (FLT r,l); see `GenerateFpCondition()`.
+ FLe(tmp, right, left, in_type);
+ FLt(result, right, left, in_type);
+ __ Addi(tmp, tmp, -1);
+ __ Add(result, result, tmp);
+ }
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unimplemented compare type " << in_type;
+ }
+}
+
+void LocationsBuilderRISCV64::VisitConstructorFence(HConstructorFence* instruction) {
+ instruction->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitConstructorFence(
+ [[maybe_unused]] HConstructorFence* instruction) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
+void LocationsBuilderRISCV64::VisitCurrentMethod(HCurrentMethod* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetOut(Location::RegisterLocation(kArtMethodRegister));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitCurrentMethod(
+ [[maybe_unused]] HCurrentMethod* instruction) {
+ // Nothing to do, the method is already at its location.
+}
+
+void LocationsBuilderRISCV64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitShouldDeoptimizeFlag(
+ HShouldDeoptimizeFlag* instruction) {
+ __ Loadw(instruction->GetLocations()->Out().AsRegister<XRegister>(),
+ SP,
+ codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
+}
+
+void LocationsBuilderRISCV64::VisitDeoptimize(HDeoptimize* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator())
+ LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ InvokeRuntimeCallingConvention calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ if (IsBooleanValueOrMaterializedCondition(instruction->InputAt(0))) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitDeoptimize(HDeoptimize* instruction) {
+ SlowPathCodeRISCV64* slow_path =
+ deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathRISCV64>(instruction);
+ GenerateTestAndBranch(instruction,
+ /* condition_input_index= */ 0,
+ slow_path->GetEntryLabel(),
+ /* false_target= */ nullptr);
+}
+
+void LocationsBuilderRISCV64::VisitDiv(HDiv* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+ switch (instruction->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected div type " << instruction->GetResultType();
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitDiv(HDiv* instruction) {
+ DataType::Type type = instruction->GetType();
+ LocationSummary* locations = instruction->GetLocations();
+
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateDivRemIntegral(instruction);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64: {
+ FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+ FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>();
+ FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>();
+ FDiv(dst, lhs, rhs, type);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected div type " << type;
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderRISCV64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
+ locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
+ SlowPathCodeRISCV64* slow_path =
+ new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathRISCV64(instruction);
+ codegen_->AddSlowPath(slow_path);
+ Location value = instruction->GetLocations()->InAt(0);
+
+ DataType::Type type = instruction->GetType();
+
+ if (!DataType::IsIntegralType(type)) {
+ LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
+ UNREACHABLE();
+ }
+
+ if (value.IsConstant()) {
+ int64_t divisor = codegen_->GetInt64ValueOf(value.GetConstant()->AsConstant());
+ if (divisor == 0) {
+ __ J(slow_path->GetEntryLabel());
+ } else {
+ // A division by a non-null constant is valid. We don't need to perform
+ // any check, so simply fall through.
+ }
+ } else {
+ __ Beqz(value.AsRegister<XRegister>(), slow_path->GetEntryLabel());
+ }
+}
+
+void LocationsBuilderRISCV64::VisitDoubleConstant(HDoubleConstant* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(instruction));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitDoubleConstant(
+ [[maybe_unused]] HDoubleConstant* instruction) {
+ // Will be generated at use site.
+}
+
+void LocationsBuilderRISCV64::VisitEqual(HEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitEqual(HEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitExit(HExit* instruction) {
+ instruction->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitExit([[maybe_unused]] HExit* instruction) {}
+
+void LocationsBuilderRISCV64::VisitFloatConstant(HFloatConstant* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(instruction));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitFloatConstant(
+ [[maybe_unused]] HFloatConstant* instruction) {
+ // Will be generated at use site.
+}
+
+void LocationsBuilderRISCV64::VisitGoto(HGoto* instruction) {
+ instruction->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitGoto(HGoto* instruction) {
+ HandleGoto(instruction, instruction->GetSuccessor());
+}
+
+void LocationsBuilderRISCV64::VisitGreaterThan(HGreaterThan* instruction) {
+ HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitGreaterThan(HGreaterThan* instruction) {
+ HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitIf(HIf* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ if (IsBooleanValueOrMaterializedCondition(instruction->InputAt(0))) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitIf(HIf* instruction) {
+ HBasicBlock* true_successor = instruction->IfTrueSuccessor();
+ HBasicBlock* false_successor = instruction->IfFalseSuccessor();
+ Riscv64Label* true_target = codegen_->GoesToNextBlock(instruction->GetBlock(), true_successor)
+ ? nullptr
+ : codegen_->GetLabelOf(true_successor);
+ Riscv64Label* false_target = codegen_->GoesToNextBlock(instruction->GetBlock(), false_successor)
+ ? nullptr
+ : codegen_->GetLabelOf(false_successor);
+ GenerateTestAndBranch(instruction, /* condition_input_index= */ 0, true_target, false_target);
+}
+
+void LocationsBuilderRISCV64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+ HandleFieldGet(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderRISCV64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+ HandleFieldSet(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+ HandleFieldSet(instruction,
+ instruction->GetFieldInfo(),
+ instruction->GetValueCanBeNull(),
+ instruction->GetWriteBarrierKind());
+}
+
+void LocationsBuilderRISCV64::VisitPredicatedInstanceFieldGet(
+ HPredicatedInstanceFieldGet* instruction) {
+ HandleFieldGet(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitPredicatedInstanceFieldGet(
+ HPredicatedInstanceFieldGet* instruction) {
+ Riscv64Label finish;
+ LocationSummary* locations = instruction->GetLocations();
+ XRegister target = locations->InAt(1).AsRegister<XRegister>();
+ __ Beqz(target, &finish);
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
+ __ Bind(&finish);
+}
+
+void LocationsBuilderRISCV64::VisitInstanceOf(HInstanceOf* instruction) {
+ LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool baker_read_barrier_slow_path = false;
+ switch (type_check_kind) {
+ case TypeCheckKind::kExactCheck:
+ case TypeCheckKind::kAbstractClassCheck:
+ case TypeCheckKind::kClassHierarchyCheck:
+ case TypeCheckKind::kArrayObjectCheck: {
+ bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
+ call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+ baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
+ break;
+ }
+ case TypeCheckKind::kArrayCheck:
+ case TypeCheckKind::kUnresolvedCheck:
+ case TypeCheckKind::kInterfaceCheck:
+ call_kind = LocationSummary::kCallOnSlowPath;
+ break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
+ }
+
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
+ if (baker_read_barrier_slow_path) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
+ locations->SetInAt(0, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
+ // The output does overlap inputs.
+ // Note that TypeCheckSlowPathRISCV64 uses this register too.
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInstanceOf(HInstanceOf* instruction) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ LocationSummary* locations = instruction->GetLocations();
+ Location obj_loc = locations->InAt(0);
+ XRegister obj = obj_loc.AsRegister<XRegister>();
+ Location cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+ ? Location::NoLocation()
+ : locations->InAt(1);
+ Location out_loc = locations->Out();
+ XRegister out = out_loc.AsRegister<XRegister>();
+ const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+ DCHECK_LE(num_temps, 1u);
+ Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
+ uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+ uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+ uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+ uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ Riscv64Label done;
+ SlowPathCodeRISCV64* slow_path = nullptr;
+
+ // Return 0 if `obj` is null.
+ // Avoid this check if we know `obj` is not null.
+ if (instruction->MustDoNullCheck()) {
+ __ Mv(out, Zero);
+ __ Beqz(obj, &done);
+ }
+
+ switch (type_check_kind) {
+ case TypeCheckKind::kExactCheck: {
+ ReadBarrierOption read_barrier_option =
+ CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+ // /* HeapReference<Class> */ out = obj->klass_
+ GenerateReferenceLoadTwoRegisters(
+ instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, read_barrier_option);
+ // Classes must be equal for the instanceof to succeed.
+ __ Xor(out, out, cls.AsRegister<XRegister>());
+ __ Seqz(out, out);
+ break;
+ }
+
+ case TypeCheckKind::kAbstractClassCheck: {
+ ReadBarrierOption read_barrier_option =
+ CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+ // /* HeapReference<Class> */ out = obj->klass_
+ GenerateReferenceLoadTwoRegisters(
+ instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, read_barrier_option);
+ // If the class is abstract, we eagerly fetch the super class of the
+ // object to avoid doing a comparison we know will fail.
+ Riscv64Label loop;
+ __ Bind(&loop);
+ // /* HeapReference<Class> */ out = out->super_class_
+ GenerateReferenceLoadOneRegister(
+ instruction, out_loc, super_offset, maybe_temp_loc, read_barrier_option);
+ // If `out` is null, we use it for the result, and jump to `done`.
+ __ Beqz(out, &done);
+ __ Bne(out, cls.AsRegister<XRegister>(), &loop);
+ __ LoadConst32(out, 1);
+ break;
+ }
+
+ case TypeCheckKind::kClassHierarchyCheck: {
+ ReadBarrierOption read_barrier_option =
+ CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+ // /* HeapReference<Class> */ out = obj->klass_
+ GenerateReferenceLoadTwoRegisters(
+ instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, read_barrier_option);
+ // Walk over the class hierarchy to find a match.
+ Riscv64Label loop, success;
+ __ Bind(&loop);
+ __ Beq(out, cls.AsRegister<XRegister>(), &success);
+ // /* HeapReference<Class> */ out = out->super_class_
+ GenerateReferenceLoadOneRegister(
+ instruction, out_loc, super_offset, maybe_temp_loc, read_barrier_option);
+ __ Bnez(out, &loop);
+ // If `out` is null, we use it for the result, and jump to `done`.
+ __ J(&done);
+ __ Bind(&success);
+ __ LoadConst32(out, 1);
+ break;
+ }
+
+ case TypeCheckKind::kArrayObjectCheck: {
+ ReadBarrierOption read_barrier_option =
+ CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+ // FIXME(riscv64): We currently have marking entrypoints for 29 registers.
+ // We need to either store entrypoint for register `N` in entry `N-A` where
+ // `A` can be up to 5 (Zero, RA, SP, GP, TP are not valid registers for
+ // marking), or define two more entrypoints, or request an additional temp
+ // from the register allocator instead of using a scratch register.
+ ScratchRegisterScope srs(GetAssembler());
+ Location tmp = Location::RegisterLocation(srs.AllocateXRegister());
+ // /* HeapReference<Class> */ tmp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(
+ instruction, tmp, obj_loc, class_offset, maybe_temp_loc, read_barrier_option);
+ // Do an exact check.
+ __ LoadConst32(out, 1);
+ __ Beq(tmp.AsRegister<XRegister>(), cls.AsRegister<XRegister>(), &done);
+ // Otherwise, we need to check that the object's class is a non-primitive array.
+ // /* HeapReference<Class> */ out = out->component_type_
+ GenerateReferenceLoadTwoRegisters(
+ instruction, out_loc, tmp, component_offset, maybe_temp_loc, read_barrier_option);
+ // If `out` is null, we use it for the result, and jump to `done`.
+ __ Beqz(out, &done);
+ __ Loadhu(out, out, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Seqz(out, out);
+ break;
+ }
+
+ case TypeCheckKind::kArrayCheck: {
+ // No read barrier since the slow path will retry upon failure.
+ // /* HeapReference<Class> */ out = obj->klass_
+ GenerateReferenceLoadTwoRegisters(
+ instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, kWithoutReadBarrier);
+ DCHECK(locations->OnlyCallsOnSlowPath());
+ slow_path = new (codegen_->GetScopedAllocator())
+ TypeCheckSlowPathRISCV64(instruction, /* is_fatal= */ false);
+ codegen_->AddSlowPath(slow_path);
+ __ Bne(out, cls.AsRegister<XRegister>(), slow_path->GetEntryLabel());
+ __ LoadConst32(out, 1);
+ break;
+ }
+
+ case TypeCheckKind::kUnresolvedCheck:
+ case TypeCheckKind::kInterfaceCheck: {
+ // Note that we indeed only call on slow path, but we always go
+ // into the slow path for the unresolved and interface check
+ // cases.
+ //
+ // We cannot directly call the InstanceofNonTrivial runtime
+ // entry point without resorting to a type checking slow path
+ // here (i.e. by calling InvokeRuntime directly), as it would
+ // require to assign fixed registers for the inputs of this
+ // HInstanceOf instruction (following the runtime calling
+ // convention), which might be cluttered by the potential first
+ // read barrier emission at the beginning of this method.
+ //
+ // TODO: Introduce a new runtime entry point taking the object
+ // to test (instead of its class) as argument, and let it deal
+ // with the read barrier issues. This will let us refactor this
+ // case of the `switch` code as it was previously (with a direct
+ // call to the runtime not using a type checking slow path).
+ // This should also be beneficial for the other cases above.
+ DCHECK(locations->OnlyCallsOnSlowPath());
+ slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathRISCV64(
+ instruction, /* is_fatal= */ false);
+ codegen_->AddSlowPath(slow_path);
+ __ J(slow_path->GetEntryLabel());
+ break;
+ }
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(
+ instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, out);
+ __ Beqz(out, out);
+ break;
+ }
+ }
+
+ __ Bind(&done);
+
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetExitLabel());
+ }
+}
+
+void LocationsBuilderRISCV64::VisitIntConstant(HIntConstant* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ locations->SetOut(Location::ConstantLocation(instruction));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitIntConstant([[maybe_unused]] HIntConstant* instruction) {
+ // Will be generated at use site.
+}
+
+void LocationsBuilderRISCV64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitInvokeUnresolved(HInvokeUnresolved* instruction) {
+ // The trampoline uses the same calling convention as dex calling conventions, except
+ // instead of loading arg0/A0 with the target Method*, arg0/A0 will contain the method_idx.
+ HandleInvoke(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInvokeUnresolved(HInvokeUnresolved* instruction) {
+ codegen_->GenerateInvokeUnresolvedRuntimeCall(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitInvokeInterface(HInvokeInterface* instruction) {
+ HandleInvoke(instruction);
+ // Use T0 as the hidden argument for `art_quick_imt_conflict_trampoline`.
+ if (instruction->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
+ instruction->GetLocations()->SetInAt(instruction->GetNumberOfArguments() - 1,
+ Location::RegisterLocation(T0));
+ } else {
+ instruction->GetLocations()->AddTemp(Location::RegisterLocation(T0));
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInvokeInterface(HInvokeInterface* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XRegister temp = locations->GetTemp(0).AsRegister<XRegister>();
+ XRegister receiver = locations->InAt(0).AsRegister<XRegister>();
+ int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+ Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kRiscv64PointerSize);
+
+ // /* HeapReference<Class> */ temp = receiver->klass_
+ __ Loadwu(temp, receiver, class_offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // Instead of simply (possibly) unpoisoning `temp` here, we should
+ // emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
+ // intermediate/temporary reference and because the current
+ // concurrent copying collector keeps the from-space memory
+ // intact/accessible until the end of the marking phase (the
+ // concurrent copying collector may not in the future).
+ codegen_->MaybeUnpoisonHeapReference(temp);
+
+ // If we're compiling baseline, update the inline cache.
+ codegen_->MaybeGenerateInlineCacheCheck(instruction, temp);
+
+ // The register T0 is required to be used for the hidden argument in
+ // `art_quick_imt_conflict_trampoline`.
+ if (instruction->GetHiddenArgumentLoadKind() != MethodLoadKind::kRecursive &&
+ instruction->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
+ Location hidden_reg = instruction->GetLocations()->GetTemp(1);
+ // Load the resolved interface method in the hidden argument register T0.
+ DCHECK_EQ(T0, hidden_reg.AsRegister<XRegister>());
+ codegen_->LoadMethod(instruction->GetHiddenArgumentLoadKind(), hidden_reg, instruction);
+ }
+
+ __ Loadd(temp, temp, mirror::Class::ImtPtrOffset(kRiscv64PointerSize).Uint32Value());
+ uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+ instruction->GetImtIndex(), kRiscv64PointerSize));
+ // temp = temp->GetImtEntryAt(method_offset);
+ __ Loadd(temp, temp, method_offset);
+ if (instruction->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
+ // We pass the method from the IMT in case of a conflict. This will ensure
+ // we go into the runtime to resolve the actual method.
+ Location hidden_reg = instruction->GetLocations()->GetTemp(1);
+ DCHECK_EQ(T0, hidden_reg.AsRegister<XRegister>());
+ __ Mv(hidden_reg.AsRegister<XRegister>(), temp);
+ }
+ // RA = temp->GetEntryPoint();
+ __ Loadd(RA, temp, entry_point.Int32Value());
+
+ // RA();
+ __ Jalr(RA);
+ DCHECK(!codegen_->IsLeafMethod());
+ codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+}
+
+void LocationsBuilderRISCV64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) {
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!instruction->IsStaticWithExplicitClinitCheck());
+
+ IntrinsicLocationsBuilderRISCV64 intrinsic(GetGraph()->GetAllocator(), codegen_);
+ if (intrinsic.TryDispatch(instruction)) {
+ return;
+ }
+
+ if (instruction->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
+ CriticalNativeCallingConventionVisitorRiscv64 calling_convention_visitor(
+ /*for_register_allocation=*/ true);
+ CodeGenerator::CreateCommonInvokeLocationSummary(instruction, &calling_convention_visitor);
+ } else {
+ HandleInvoke(instruction);
+ }
+}
+
+static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorRISCV64* codegen) {
+ if (invoke->GetLocations()->Intrinsified()) {
+ IntrinsicCodeGeneratorRISCV64 intrinsic(codegen);
+ intrinsic.Dispatch(invoke);
+ return true;
+ }
+ return false;
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInvokeStaticOrDirect(
+ HInvokeStaticOrDirect* instruction) {
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!instruction->IsStaticWithExplicitClinitCheck());
+
+ if (TryGenerateIntrinsicCode(instruction, codegen_)) {
+ return;
+ }
+
+ LocationSummary* locations = instruction->GetLocations();
+ codegen_->GenerateStaticOrDirectCall(
+ instruction, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
+}
+
+void LocationsBuilderRISCV64::VisitInvokeVirtual(HInvokeVirtual* instruction) {
+ IntrinsicLocationsBuilderRISCV64 intrinsic(GetGraph()->GetAllocator(), codegen_);
+ if (intrinsic.TryDispatch(instruction)) {
+ return;
+ }
+
+ HandleInvoke(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInvokeVirtual(HInvokeVirtual* instruction) {
+ if (TryGenerateIntrinsicCode(instruction, codegen_)) {
+ return;
+ }
+
+ codegen_->GenerateVirtualCall(instruction, instruction->GetLocations()->GetTemp(0));
+ DCHECK(!codegen_->IsLeafMethod());
+}
+
+void LocationsBuilderRISCV64::VisitInvokePolymorphic(HInvokePolymorphic* instruction) {
+ HandleInvoke(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInvokePolymorphic(HInvokePolymorphic* instruction) {
+ codegen_->GenerateInvokePolymorphicCall(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitInvokeCustom(HInvokeCustom* instruction) {
+ HandleInvoke(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInvokeCustom(HInvokeCustom* instruction) {
+ codegen_->GenerateInvokeCustomCall(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitLessThan(HLessThan* instruction) {
+ HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitLessThan(HLessThan* instruction) {
+ HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitLessThanOrEqual(HLessThanOrEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitLessThanOrEqual(HLessThanOrEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitLoadClass(HLoadClass* instruction) {
+ HLoadClass::LoadKind load_kind = instruction->GetLoadKind();
+ if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
+ InvokeRuntimeCallingConvention calling_convention;
+ Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+ DCHECK_EQ(DataType::Type::kReference, instruction->GetType());
+ DCHECK(loc.Equals(calling_convention.GetReturnLocation(DataType::Type::kReference)));
+ CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(instruction, loc, loc);
+ return;
+ }
+ DCHECK_EQ(instruction->NeedsAccessCheck(),
+ load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPackage);
+
+ const bool requires_read_barrier = gUseReadBarrier && !instruction->IsInBootImage();
+ LocationSummary::CallKind call_kind = (instruction->NeedsEnvironment() || requires_read_barrier)
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall;
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
+ if (kUseBakerReadBarrier && requires_read_barrier && !instruction->NeedsEnvironment()) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
+ if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ }
+ locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
+ if (!gUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the type resolution or initialization and marking to save everything we need.
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
+ } else {
+ // For non-Baker read barriers we have a temp-clobbering call.
+ }
+ }
+}
+
+// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
+// move.
+void InstructionCodeGeneratorRISCV64::VisitLoadClass(HLoadClass* instruction)
+ NO_THREAD_SAFETY_ANALYSIS {
+ HLoadClass::LoadKind load_kind = instruction->GetLoadKind();
+ if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
+ codegen_->GenerateLoadClassRuntimeCall(instruction);
+ return;
+ }
+ DCHECK_EQ(instruction->NeedsAccessCheck(),
+ load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPackage);
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location out_loc = locations->Out();
+ XRegister out = out_loc.AsRegister<XRegister>();
+ const ReadBarrierOption read_barrier_option =
+ instruction->IsInBootImage() ? kWithoutReadBarrier : GetCompilerReadBarrierOption();
+ bool generate_null_check = false;
+ switch (load_kind) {
+ case HLoadClass::LoadKind::kReferrersClass: {
+ DCHECK(!instruction->CanCallRuntime());
+ DCHECK(!instruction->MustGenerateClinitCheck());
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ XRegister current_method = locations->InAt(0).AsRegister<XRegister>();
+ GenerateGcRootFieldLoad(instruction,
+ out_loc,
+ current_method,
+ ArtMethod::DeclaringClassOffset().Int32Value(),
+ read_barrier_option);
+ break;
+ }
+ case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+ DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
+ codegen_->GetCompilerOptions().IsBootImageExtension());
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_high =
+ codegen_->NewBootImageTypePatch(instruction->GetDexFile(), instruction->GetTypeIndex());
+ codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out);
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_low =
+ codegen_->NewBootImageTypePatch(
+ instruction->GetDexFile(), instruction->GetTypeIndex(), info_high);
+ codegen_->EmitPcRelativeAddiPlaceholder(info_low, out, out);
+ break;
+ }
+ case HLoadClass::LoadKind::kBootImageRelRo: {
+ DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(instruction);
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_high =
+ codegen_->NewBootImageRelRoPatch(boot_image_offset);
+ codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out);
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_low =
+ codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high);
+ codegen_->EmitPcRelativeLwuPlaceholder(info_low, out, out);
+ break;
+ }
+ case HLoadClass::LoadKind::kBssEntry:
+ case HLoadClass::LoadKind::kBssEntryPublic:
+ case HLoadClass::LoadKind::kBssEntryPackage: {
+ CodeGeneratorRISCV64::PcRelativePatchInfo* bss_info_high =
+ codegen_->NewTypeBssEntryPatch(instruction);
+ codegen_->EmitPcRelativeAuipcPlaceholder(bss_info_high, out);
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = codegen_->NewTypeBssEntryPatch(
+ instruction, bss_info_high);
+ GenerateGcRootFieldLoad(instruction,
+ out_loc,
+ out,
+ /* offset= */ kLinkTimeOffsetPlaceholderLow,
+ read_barrier_option,
+ &info_low->label);
+ generate_null_check = true;
+ break;
+ }
+ case HLoadClass::LoadKind::kJitBootImageAddress: {
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
+ uint32_t address = reinterpret_cast32<uint32_t>(instruction->GetClass().Get());
+ DCHECK_NE(address, 0u);
+ __ Loadwu(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+ break;
+ }
+ case HLoadClass::LoadKind::kJitTableAddress:
+ __ Loadwu(out, codegen_->DeduplicateJitClassLiteral(instruction->GetDexFile(),
+ instruction->GetTypeIndex(),
+ instruction->GetClass()));
+ GenerateGcRootFieldLoad(instruction, out_loc, out, /* offset= */ 0, read_barrier_option);
+ break;
+ case HLoadClass::LoadKind::kRuntimeCall:
+ case HLoadClass::LoadKind::kInvalid:
+ LOG(FATAL) << "UNREACHABLE";
+ UNREACHABLE();
+ }
+
+ if (generate_null_check || instruction->MustGenerateClinitCheck()) {
+ DCHECK(instruction->CanCallRuntime());
+ SlowPathCodeRISCV64* slow_path =
+ new (codegen_->GetScopedAllocator()) LoadClassSlowPathRISCV64(instruction, instruction);
+ codegen_->AddSlowPath(slow_path);
+ if (generate_null_check) {
+ __ Beqz(out, slow_path->GetEntryLabel());
+ }
+ if (instruction->MustGenerateClinitCheck()) {
+ GenerateClassInitializationCheck(slow_path, out);
+ } else {
+ __ Bind(slow_path->GetExitLabel());
+ }
+ }
+}
+
+void LocationsBuilderRISCV64::VisitLoadException(HLoadException* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitLoadException(HLoadException* instruction) {
+ XRegister out = instruction->GetLocations()->Out().AsRegister<XRegister>();
+ __ Loadwu(out, TR, GetExceptionTlsOffset());
+}
+
+void LocationsBuilderRISCV64::VisitLoadMethodHandle(HLoadMethodHandle* instruction) {
+ InvokeRuntimeCallingConvention calling_convention;
+ Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+ CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(instruction, loc, loc);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitLoadMethodHandle(HLoadMethodHandle* instruction) {
+ codegen_->GenerateLoadMethodHandleRuntimeCall(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitLoadMethodType(HLoadMethodType* instruction) {
+ InvokeRuntimeCallingConvention calling_convention;
+ Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+ CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(instruction, loc, loc);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitLoadMethodType(HLoadMethodType* instruction) {
+ codegen_->GenerateLoadMethodTypeRuntimeCall(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitLoadString(HLoadString* instruction) {
+ HLoadString::LoadKind load_kind = instruction->GetLoadKind();
+ LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(instruction);
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
+ if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
+ InvokeRuntimeCallingConvention calling_convention;
+ DCHECK_EQ(DataType::Type::kReference, instruction->GetType());
+ locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
+ } else {
+ locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadString::LoadKind::kBssEntry) {
+ if (!gUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the pResolveString and marking to save everything we need.
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
+ } else {
+ // For non-Baker read barriers we have a temp-clobbering call.
+ }
+ }
+ }
+}
+
+// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
+// move.
+void InstructionCodeGeneratorRISCV64::VisitLoadString(HLoadString* instruction)
+ NO_THREAD_SAFETY_ANALYSIS {
+ HLoadString::LoadKind load_kind = instruction->GetLoadKind();
+ LocationSummary* locations = instruction->GetLocations();
+ Location out_loc = locations->Out();
+ XRegister out = out_loc.AsRegister<XRegister>();
+
+ switch (load_kind) {
+ case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
+ DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
+ codegen_->GetCompilerOptions().IsBootImageExtension());
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_high = codegen_->NewBootImageStringPatch(
+ instruction->GetDexFile(), instruction->GetStringIndex());
+ codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out);
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = codegen_->NewBootImageStringPatch(
+ instruction->GetDexFile(), instruction->GetStringIndex(), info_high);
+ codegen_->EmitPcRelativeAddiPlaceholder(info_low, out, out);
+ return;
+ }
+ case HLoadString::LoadKind::kBootImageRelRo: {
+ DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(instruction);
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_high =
+ codegen_->NewBootImageRelRoPatch(boot_image_offset);
+ codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out);
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_low =
+ codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high);
+ codegen_->EmitPcRelativeLwuPlaceholder(info_low, out, out);
+ return;
+ }
+ case HLoadString::LoadKind::kBssEntry: {
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_high = codegen_->NewStringBssEntryPatch(
+ instruction->GetDexFile(), instruction->GetStringIndex());
+ codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out);
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = codegen_->NewStringBssEntryPatch(
+ instruction->GetDexFile(), instruction->GetStringIndex(), info_high);
+ GenerateGcRootFieldLoad(instruction,
+ out_loc,
+ out,
+ /* offset= */ kLinkTimeOffsetPlaceholderLow,
+ GetCompilerReadBarrierOption(),
+ &info_low->label);
+ SlowPathCodeRISCV64* slow_path =
+ new (codegen_->GetScopedAllocator()) LoadStringSlowPathRISCV64(instruction);
+ codegen_->AddSlowPath(slow_path);
+ __ Beqz(out, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ return;
+ }
+ case HLoadString::LoadKind::kJitBootImageAddress: {
+ uint32_t address = reinterpret_cast32<uint32_t>(instruction->GetString().Get());
+ DCHECK_NE(address, 0u);
+ __ Loadwu(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+ return;
+ }
+ case HLoadString::LoadKind::kJitTableAddress:
+ __ Loadwu(
+ out,
+ codegen_->DeduplicateJitStringLiteral(
+ instruction->GetDexFile(), instruction->GetStringIndex(), instruction->GetString()));
+ GenerateGcRootFieldLoad(instruction, out_loc, out, 0, GetCompilerReadBarrierOption());
+ return;
+ default:
+ break;
+ }
+
+ DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall);
+ InvokeRuntimeCallingConvention calling_convention;
+ DCHECK(calling_convention.GetReturnLocation(DataType::Type::kReference).Equals(out_loc));
+ __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetStringIndex().index_);
+ codegen_->InvokeRuntime(kQuickResolveString, instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+}
+
+void LocationsBuilderRISCV64::VisitLongConstant(HLongConstant* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ locations->SetOut(Location::ConstantLocation(instruction));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitLongConstant(
+ [[maybe_unused]] HLongConstant* instruction) {
+ // Will be generated at use site.
+}
+
+void LocationsBuilderRISCV64::VisitMax(HMax* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitMax(HMax* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitMemoryBarrier(HMemoryBarrier* instruction) {
+ instruction->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitMemoryBarrier(HMemoryBarrier* instruction) {
+ codegen_->GenerateMemoryBarrier(instruction->GetBarrierKind());
+}
+
+void LocationsBuilderRISCV64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitMethodExitHook(HMethodExitHook* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitMethodExitHook(HMethodExitHook* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitMin(HMin* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitMin(HMin* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitMonitorOperation(HMonitorOperation* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
+ instruction, LocationSummary::kCallOnMainOnly);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitMonitorOperation(HMonitorOperation* instruction) {
+ codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
+ instruction,
+ instruction->GetDexPc());
+ if (instruction->IsEnter()) {
+ CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+ } else {
+ CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+ }
+}
+
+void LocationsBuilderRISCV64::VisitMul(HMul* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+ switch (instruction->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected mul type " << instruction->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitMul(HMul* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ switch (instruction->GetResultType()) {
+ case DataType::Type::kInt32:
+ __ Mulw(locations->Out().AsRegister<XRegister>(),
+ locations->InAt(0).AsRegister<XRegister>(),
+ locations->InAt(1).AsRegister<XRegister>());
+ break;
+
+ case DataType::Type::kInt64:
+ __ Mul(locations->Out().AsRegister<XRegister>(),
+ locations->InAt(0).AsRegister<XRegister>(),
+ locations->InAt(1).AsRegister<XRegister>());
+ break;
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ FMul(locations->Out().AsFpuRegister<FRegister>(),
+ locations->InAt(0).AsFpuRegister<FRegister>(),
+ locations->InAt(1).AsFpuRegister<FRegister>(),
+ instruction->GetResultType());
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected mul type " << instruction->GetResultType();
+ }
+}
+
+void LocationsBuilderRISCV64::VisitNeg(HNeg* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ switch (instruction->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected neg type " << instruction->GetResultType();
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNeg(HNeg* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ switch (instruction->GetResultType()) {
+ case DataType::Type::kInt32:
+ __ NegW(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
+ break;
+
+ case DataType::Type::kInt64:
+ __ Neg(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
+ break;
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ FNeg(locations->Out().AsFpuRegister<FRegister>(),
+ locations->InAt(0).AsFpuRegister<FRegister>(),
+ instruction->GetResultType());
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected neg type " << instruction->GetResultType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderRISCV64::VisitNewArray(HNewArray* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator())
+ LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNewArray(HNewArray* instruction) {
+ QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
+ codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
+ DCHECK(!codegen_->IsLeafMethod());
+}
+
+void LocationsBuilderRISCV64::VisitNewInstance(HNewInstance* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
+ instruction, LocationSummary::kCallOnMainOnly);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNewInstance(HNewInstance* instruction) {
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+}
+
+void LocationsBuilderRISCV64::VisitNop(HNop* instruction) {
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNop([[maybe_unused]] HNop* instruction) {
+ // The environment recording already happened in CodeGenerator::Compile.
+}
+
+void LocationsBuilderRISCV64::VisitNot(HNot* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNot(HNot* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ switch (instruction->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ __ Not(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderRISCV64::VisitNotEqual(HNotEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNotEqual(HNotEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitNullConstant(HNullConstant* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ locations->SetOut(Location::ConstantLocation(instruction));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNullConstant(
+ [[maybe_unused]] HNullConstant* instruction) {
+ // Will be generated at use site.
+}
+
+void LocationsBuilderRISCV64::VisitNullCheck(HNullCheck* instruction) {
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
+ locations->SetInAt(0, Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNullCheck(HNullCheck* instruction) {
+ codegen_->GenerateNullCheck(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitOr(HOr* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitOr(HOr* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitPackedSwitch(HPackedSwitch* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitPackedSwitch(HPackedSwitch* instruction) {
+ int32_t lower_bound = instruction->GetStartValue();
+ uint32_t num_entries = instruction->GetNumEntries();
+ LocationSummary* locations = instruction->GetLocations();
+ XRegister value = locations->InAt(0).AsRegister<XRegister>();
+ HBasicBlock* switch_block = instruction->GetBlock();
+ HBasicBlock* default_block = instruction->GetDefaultBlock();
+
+ // Prepare a temporary register and an adjusted zero-based value.
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister temp = srs.AllocateXRegister();
+ XRegister adjusted = value;
+ if (lower_bound != 0) {
+ adjusted = temp;
+ __ AddConst32(temp, value, -lower_bound);
+ }
+
+ // Jump to the default block if the index is out of the packed switch value range.
+ // Note: We could save one instruction for `num_entries == 1` with BNEZ but the
+ // `HInstructionBuilder` transforms that case to an `HIf`, so let's keep the code simple.
+ CHECK_NE(num_entries, 0u); // `HInstructionBuilder` creates a `HGoto` for empty packed-switch.
+ {
+ ScratchRegisterScope srs2(GetAssembler());
+ XRegister temp2 = srs2.AllocateXRegister();
+ __ LoadConst32(temp2, num_entries);
+ __ Bgeu(adjusted, temp2, codegen_->GetLabelOf(default_block)); // Can clobber `TMP` if taken.
+ }
+
+ if (num_entries >= kPackedSwitchCompareJumpThreshold) {
+ GenTableBasedPackedSwitch(adjusted, temp, num_entries, switch_block);
+ } else {
+ GenPackedSwitchWithCompares(adjusted, temp, num_entries, switch_block);
+ }
+}
+
+void LocationsBuilderRISCV64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
+ LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitParallelMove(HParallelMove* instruction) {
+ if (instruction->GetNext()->IsSuspendCheck() &&
+ instruction->GetBlock()->GetLoopInformation() != nullptr) {
+ HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
+ // The back edge will generate the suspend check.
+ codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
+ }
+
+ codegen_->GetMoveResolver()->EmitNativeCode(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitParameterValue(HParameterValue* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
+ if (location.IsStackSlot()) {
+ location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+ } else if (location.IsDoubleStackSlot()) {
+ location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+ }
+ locations->SetOut(location);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitParameterValue(
+ [[maybe_unused]] HParameterValue* instruction) {
+ // Nothing to do, the parameter is already at its location.
+}
+
+void LocationsBuilderRISCV64::VisitPhi(HPhi* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
+ locations->SetInAt(i, Location::Any());
+ }
+ locations->SetOut(Location::Any());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitPhi([[maybe_unused]] HPhi* instruction) {
+ LOG(FATAL) << "Unreachable";
+}
+
+void LocationsBuilderRISCV64::VisitRem(HRem* instruction) {
+ DataType::Type type = instruction->GetResultType();
+ LocationSummary::CallKind call_kind =
+ DataType::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
+ : LocationSummary::kNoCall;
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
+
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64: {
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+ locations->SetOut(calling_convention.GetReturnLocation(type));
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unexpected rem type " << type;
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitRem(HRem* instruction) {
+ DataType::Type type = instruction->GetType();
+
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateDivRemIntegral(instruction);
+ break;
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64: {
+ QuickEntrypointEnum entrypoint =
+ (type == DataType::Type::kFloat32) ? kQuickFmodf : kQuickFmod;
+ codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
+ if (type == DataType::Type::kFloat32) {
+ CheckEntrypointTypes<kQuickFmodf, float, float, float>();
+ } else {
+ CheckEntrypointTypes<kQuickFmod, double, double, double>();
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected rem type " << type;
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderRISCV64::VisitReturn(HReturn* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ DataType::Type return_type = instruction->InputAt(0)->GetType();
+ DCHECK_NE(return_type, DataType::Type::kVoid);
+ locations->SetInAt(0, Riscv64ReturnLocation(return_type));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitReturn(HReturn* instruction) {
+ if (GetGraph()->IsCompilingOsr()) {
+ // To simplify callers of an OSR method, we put a floating point return value
+ // in both floating point and core return registers.
+ switch (instruction->InputAt(0)->GetType()) {
+ case DataType::Type::kFloat32:
+ __ FMvXW(A0, FA0);
+ break;
+ case DataType::Type::kFloat64:
+ __ FMvXD(A0, FA0);
+ break;
+ default:
+ break;
+ }
+ }
+ codegen_->GenerateFrameExit();
+}
+
+void LocationsBuilderRISCV64::VisitReturnVoid(HReturnVoid* instruction) {
+ instruction->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitReturnVoid([[maybe_unused]] HReturnVoid* instruction) {
+ codegen_->GenerateFrameExit();
+}
+
+void LocationsBuilderRISCV64::VisitRor(HRor* instruction) {
+ HandleShift(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitRor(HRor* instruction) {
+ HandleShift(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitShl(HShl* instruction) {
+ HandleShift(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitShl(HShl* instruction) {
+ HandleShift(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitShr(HShr* instruction) {
+ HandleShift(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitShr(HShr* instruction) {
+ HandleShift(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+ HandleFieldGet(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderRISCV64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+ HandleFieldSet(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+ HandleFieldSet(instruction,
+ instruction->GetFieldInfo(),
+ instruction->GetValueCanBeNull(),
+ instruction->GetWriteBarrierKind());
+}
+
+void LocationsBuilderRISCV64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
+ codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(A0));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
+ __ LoadConst32(A0, instruction->GetFormat()->GetValue());
+ codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
+}
+
+void LocationsBuilderRISCV64::VisitUnresolvedInstanceFieldGet(
+ HUnresolvedInstanceFieldGet* instruction) {
+ FieldAccessCallingConventionRISCV64 calling_convention;
+ codegen_->CreateUnresolvedFieldLocationSummary(
+ instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitUnresolvedInstanceFieldGet(
+ HUnresolvedInstanceFieldGet* instruction) {
+ FieldAccessCallingConventionRISCV64 calling_convention;
+ codegen_->GenerateUnresolvedFieldAccess(instruction,
+ instruction->GetFieldType(),
+ instruction->GetFieldIndex(),
+ instruction->GetDexPc(),
+ calling_convention);
+}
+
+void LocationsBuilderRISCV64::VisitUnresolvedInstanceFieldSet(
+ HUnresolvedInstanceFieldSet* instruction) {
+ FieldAccessCallingConventionRISCV64 calling_convention;
+ codegen_->CreateUnresolvedFieldLocationSummary(
+ instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitUnresolvedInstanceFieldSet(
+ HUnresolvedInstanceFieldSet* instruction) {
+ FieldAccessCallingConventionRISCV64 calling_convention;
+ codegen_->GenerateUnresolvedFieldAccess(instruction,
+ instruction->GetFieldType(),
+ instruction->GetFieldIndex(),
+ instruction->GetDexPc(),
+ calling_convention);
+}
+
+void LocationsBuilderRISCV64::VisitUnresolvedStaticFieldGet(
+ HUnresolvedStaticFieldGet* instruction) {
+ FieldAccessCallingConventionRISCV64 calling_convention;
+ codegen_->CreateUnresolvedFieldLocationSummary(
+ instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitUnresolvedStaticFieldGet(
+ HUnresolvedStaticFieldGet* instruction) {
+ FieldAccessCallingConventionRISCV64 calling_convention;
+ codegen_->GenerateUnresolvedFieldAccess(instruction,
+ instruction->GetFieldType(),
+ instruction->GetFieldIndex(),
+ instruction->GetDexPc(),
+ calling_convention);
+}
+
+void LocationsBuilderRISCV64::VisitUnresolvedStaticFieldSet(
+ HUnresolvedStaticFieldSet* instruction) {
+ FieldAccessCallingConventionRISCV64 calling_convention;
+ codegen_->CreateUnresolvedFieldLocationSummary(
+ instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitUnresolvedStaticFieldSet(
+ HUnresolvedStaticFieldSet* instruction) {
+ FieldAccessCallingConventionRISCV64 calling_convention;
+ codegen_->GenerateUnresolvedFieldAccess(instruction,
+ instruction->GetFieldType(),
+ instruction->GetFieldIndex(),
+ instruction->GetDexPc(),
+ calling_convention);
+}
+
+void LocationsBuilderRISCV64::VisitSelect(HSelect* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitSelect(HSelect* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitSub(HSub* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitSub(HSub* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitSuspendCheck(HSuspendCheck* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator())
+ LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ // In suspend check slow path, usually there are no caller-save registers at all.
+ // If SIMD instructions are present, however, we force spilling all live SIMD
+ // registers in full width (since the runtime only saves/restores lower part).
+ locations->SetCustomSlowPathCallerSaves(GetGraph()->HasSIMD() ? RegisterSet::AllFpu() :
+ RegisterSet::Empty());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitSuspendCheck(HSuspendCheck* instruction) {
+ HBasicBlock* block = instruction->GetBlock();
+ if (block->GetLoopInformation() != nullptr) {
+ DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
+ // The back edge will generate the suspend check.
+ return;
+ }
+ if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
+ // The goto will generate the suspend check.
+ return;
+ }
+ GenerateSuspendCheck(instruction, nullptr);
+}
+
+void LocationsBuilderRISCV64::VisitThrow(HThrow* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator())
+ LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitThrow(HThrow* instruction) {
+ codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
+}
+
+void LocationsBuilderRISCV64::VisitTryBoundary(HTryBoundary* instruction) {
+ instruction->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitTryBoundary(HTryBoundary* instruction) {
+ HBasicBlock* successor = instruction->GetNormalFlowSuccessor();
+ if (!successor->IsExitBlock()) {
+ HandleGoto(instruction, successor);
+ }
+}
+
+void LocationsBuilderRISCV64::VisitTypeConversion(HTypeConversion* instruction) {
+ DataType::Type input_type = instruction->GetInputType();
+ DataType::Type result_type = instruction->GetResultType();
+ DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
+ << input_type << " -> " << result_type;
+
+ if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) ||
+ (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) {
+ LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
+ }
+
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+
+ if (DataType::IsFloatingPointType(input_type)) {
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(0, Location::RequiresRegister());
+ }
+
+ if (DataType::IsFloatingPointType(result_type)) {
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ } else {
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitTypeConversion(HTypeConversion* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DataType::Type result_type = instruction->GetResultType();
+ DataType::Type input_type = instruction->GetInputType();
+
+ DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
+ << input_type << " -> " << result_type;
+
+ if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) {
+ XRegister dst = locations->Out().AsRegister<XRegister>();
+ XRegister src = locations->InAt(0).AsRegister<XRegister>();
+ switch (result_type) {
+ case DataType::Type::kUint8:
+ __ Andi(dst, src, 0xFF);
+ break;
+ case DataType::Type::kInt8:
+ __ SextB(dst, src);
+ break;
+ case DataType::Type::kUint16:
+ __ ZextH(dst, src);
+ break;
+ case DataType::Type::kInt16:
+ __ SextH(dst, src);
+ break;
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ // Sign-extend 32-bit int into bits 32 through 63 for int-to-long and long-to-int
+ // conversions, except when the input and output registers are the same and we are not
+ // converting longs to shorter types. In these cases, do nothing.
+ if ((input_type == DataType::Type::kInt64) || (dst != src)) {
+ __ Addiw(dst, src, 0);
+ }
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type conversion from " << input_type
+ << " to " << result_type;
+ UNREACHABLE();
+ }
+ } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) {
+ FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+ XRegister src = locations->InAt(0).AsRegister<XRegister>();
+ if (input_type == DataType::Type::kInt64) {
+ if (result_type == DataType::Type::kFloat32) {
+ __ FCvtSL(dst, src, FPRoundingMode::kRNE);
+ } else {
+ __ FCvtDL(dst, src, FPRoundingMode::kRNE);
+ }
+ } else {
+ if (result_type == DataType::Type::kFloat32) {
+ __ FCvtSW(dst, src, FPRoundingMode::kRNE);
+ } else {
+ __ FCvtDW(dst, src); // No rounding.
+ }
+ }
+ } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) {
+ CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64);
+ XRegister dst = locations->Out().AsRegister<XRegister>();
+ FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+ if (result_type == DataType::Type::kInt64) {
+ if (input_type == DataType::Type::kFloat32) {
+ __ FCvtLS(dst, src, FPRoundingMode::kRTZ);
+ } else {
+ __ FCvtLD(dst, src, FPRoundingMode::kRTZ);
+ }
+ } else {
+ if (input_type == DataType::Type::kFloat32) {
+ __ FCvtWS(dst, src, FPRoundingMode::kRTZ);
+ } else {
+ __ FCvtWD(dst, src, FPRoundingMode::kRTZ);
+ }
+ }
+ } else if (DataType::IsFloatingPointType(result_type) &&
+ DataType::IsFloatingPointType(input_type)) {
+ FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+ FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+ if (result_type == DataType::Type::kFloat32) {
+ __ FCvtSD(dst, src);
+ } else {
+ __ FCvtDS(dst, src);
+ }
+ } else {
+ LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
+ << " to " << result_type;
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderRISCV64::VisitUShr(HUShr* instruction) {
+ HandleShift(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitUShr(HUShr* instruction) {
+ HandleShift(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitXor(HXor* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitXor(HXor* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecReduce(HVecReduce* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecReduce(HVecReduce* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecCnv(HVecCnv* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecCnv(HVecCnv* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecNeg(HVecNeg* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecNeg(HVecNeg* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecAbs(HVecAbs* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecAbs(HVecAbs* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecNot(HVecNot* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecNot(HVecNot* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecAdd(HVecAdd* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecAdd(HVecAdd* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecSub(HVecSub* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecSub(HVecSub* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecMul(HVecMul* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecMul(HVecMul* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecDiv(HVecDiv* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecDiv(HVecDiv* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecMin(HVecMin* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecMin(HVecMin* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecMax(HVecMax* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecMax(HVecMax* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecAnd(HVecAnd* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecAnd(HVecAnd* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecAndNot(HVecAndNot* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecAndNot(HVecAndNot* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecOr(HVecOr* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecOr(HVecOr* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecXor(HVecXor* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecXor(HVecXor* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecShl(HVecShl* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecShl(HVecShl* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecShr(HVecShr* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecShr(HVecShr* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecUShr(HVecUShr* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecUShr(HVecUShr* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecMultiplyAccumulate(
+ HVecMultiplyAccumulate* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecDotProd(HVecDotProd* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecDotProd(HVecDotProd* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecLoad(HVecLoad* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecLoad(HVecLoad* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecStore(HVecStore* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecStore(HVecStore* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecPredSetAll(HVecPredSetAll* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecPredSetAll(HVecPredSetAll* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecPredWhile(HVecPredWhile* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecPredWhile(HVecPredWhile* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecCondition(HVecCondition* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecCondition(HVecCondition* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecPredNot(HVecPredNot* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecPredNot(HVecPredNot* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+namespace detail {
+
+// Mark which intrinsics we don't have handcrafted code for.
+template <Intrinsics T>
+struct IsUnimplemented {
+ bool is_unimplemented = false;
+};
+
+#define TRUE_OVERRIDE(Name) \
+ template <> \
+ struct IsUnimplemented<Intrinsics::k##Name> { \
+ bool is_unimplemented = true; \
+ };
+UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(TRUE_OVERRIDE)
+#undef TRUE_OVERRIDE
+
+static constexpr bool kIsIntrinsicUnimplemented[] = {
+ false, // kNone
+#define IS_UNIMPLEMENTED(Intrinsic, ...) \
+ IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+ ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
+#undef IS_UNIMPLEMENTED
+};
+
+} // namespace detail
+
+CodeGeneratorRISCV64::CodeGeneratorRISCV64(HGraph* graph,
+ const CompilerOptions& compiler_options,
+ OptimizingCompilerStats* stats)
+ : CodeGenerator(graph,
+ kNumberOfXRegisters,
+ kNumberOfFRegisters,
+ /*number_of_register_pairs=*/ 0u,
+ ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves)),
+ ComputeRegisterMask(kFpuCalleeSaves, arraysize(kFpuCalleeSaves)),
+ compiler_options,
+ stats,
+ ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
+ assembler_(graph->GetAllocator(),
+ compiler_options.GetInstructionSetFeatures()->AsRiscv64InstructionSetFeatures()),
+ location_builder_(graph, this),
+ instruction_visitor_(graph, this),
+ block_labels_(nullptr),
+ move_resolver_(graph->GetAllocator(), this),
+ uint32_literals_(std::less<uint32_t>(),
+ graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ uint64_literals_(std::less<uint64_t>(),
+ graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ jit_string_patches_(StringReferenceValueComparator(),
+ graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ jit_class_patches_(TypeReferenceValueComparator(),
+ graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
+ // Always mark the RA register to be saved.
+ AddAllocatedRegister(Location::RegisterLocation(RA));
+}
+
+void CodeGeneratorRISCV64::MaybeIncrementHotness(bool is_frame_entry) {
+ if (GetCompilerOptions().CountHotnessInCompiledCode()) {
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister method = is_frame_entry ? kArtMethodRegister : srs.AllocateXRegister();
+ if (!is_frame_entry) {
+ __ Loadd(method, SP, 0);
+ }
+ XRegister counter = srs.AllocateXRegister();
+ __ Loadhu(counter, method, ArtMethod::HotnessCountOffset().Int32Value());
+ Riscv64Label done;
+ DCHECK_EQ(0u, interpreter::kNterpHotnessValue);
+ __ Beqz(counter, &done); // Can clobber `TMP` if taken.
+ __ Addi(counter, counter, -1);
+ // We may not have another scratch register available for `Storeh`()`,
+ // so we must use the `Sh()` function directly.
+ static_assert(IsInt<12>(ArtMethod::HotnessCountOffset().Int32Value()));
+ __ Sh(counter, method, ArtMethod::HotnessCountOffset().Int32Value());
+ __ Bind(&done);
+ }
+
+ if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ SlowPathCodeRISCV64* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathRISCV64();
+ AddSlowPath(slow_path);
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ DCHECK(!HasEmptyFrame());
+ uint64_t address = reinterpret_cast64<uint64_t>(info);
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ __ LoadConst64(tmp, address);
+ XRegister counter = srs.AllocateXRegister();
+ __ Loadhu(counter, tmp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value());
+ __ Beqz(counter, slow_path->GetEntryLabel()); // Can clobber `TMP` if taken.
+ __ Addi(counter, counter, -1);
+ // We do not have another scratch register available for `Storeh`()`,
+ // so we must use the `Sh()` function directly.
+ static_assert(IsInt<12>(ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
+ __ Sh(counter, tmp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value());
+ __ Bind(slow_path->GetExitLabel());
+ }
+}
+
+bool CodeGeneratorRISCV64::CanUseImplicitSuspendCheck() const {
+ // TODO(riscv64): Implement implicit suspend checks to reduce code size.
+ return false;
+}
+
+void CodeGeneratorRISCV64::GenerateMemoryBarrier(MemBarrierKind kind) {
+ switch (kind) {
+ case MemBarrierKind::kAnyAny:
+ __ Fence(/*pred=*/ kFenceRead | kFenceWrite, /*succ=*/ kFenceRead | kFenceWrite);
+ break;
+ case MemBarrierKind::kAnyStore:
+ __ Fence(/*pred=*/ kFenceRead | kFenceWrite, /*succ=*/ kFenceWrite);
+ break;
+ case MemBarrierKind::kLoadAny:
+ __ Fence(/*pred=*/ kFenceRead, /*succ=*/ kFenceRead | kFenceWrite);
+ break;
+ case MemBarrierKind::kStoreStore:
+ __ Fence(/*pred=*/ kFenceWrite, /*succ=*/ kFenceWrite);
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected memory barrier " << kind;
+ UNREACHABLE();
+ }
+}
+
+void CodeGeneratorRISCV64::GenerateFrameEntry() {
+ // Check if we need to generate the clinit check. We will jump to the
+ // resolution stub if the class is not initialized and the executing thread is
+ // not the thread initializing it.
+ // We do this before constructing the frame to get the correct stack trace if
+ // an exception is thrown.
+ if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
+ Riscv64Label resolution;
+ Riscv64Label memory_barrier;
+
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ XRegister tmp2 = srs.AllocateXRegister();
+
+ // We don't emit a read barrier here to save on code size. We rely on the
+ // resolution trampoline to do a clinit check before re-entering this code.
+ __ Loadwu(tmp2, kArtMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value());
+
+ // We shall load the full 32-bit status word with sign-extension and compare as unsigned
+ // to sign-extended shifted status values. This yields the same comparison as loading and
+ // materializing unsigned but the constant is materialized with a single LUI instruction.
+ __ Loadw(tmp, tmp2, mirror::Class::StatusOffset().SizeValue()); // Sign-extended.
+
+ // Check if we're visibly initialized.
+ __ Li(tmp2, ShiftedSignExtendedClassStatusValue<ClassStatus::kVisiblyInitialized>());
+ __ Bgeu(tmp, tmp2, &frame_entry_label_); // Can clobber `TMP` if taken.
+
+ // Check if we're initialized and jump to code that does a memory barrier if so.
+ __ Li(tmp2, ShiftedSignExtendedClassStatusValue<ClassStatus::kInitialized>());
+ __ Bgeu(tmp, tmp2, &memory_barrier); // Can clobber `TMP` if taken.
+
+ // Check if we're initializing and the thread initializing is the one
+ // executing the code.
+ __ Li(tmp2, ShiftedSignExtendedClassStatusValue<ClassStatus::kInitializing>());
+ __ Bltu(tmp, tmp2, &resolution); // Can clobber `TMP` if taken.
+
+ __ Loadwu(tmp2, kArtMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value());
+ __ Loadw(tmp, tmp2, mirror::Class::ClinitThreadIdOffset().Int32Value());
+ __ Loadw(tmp2, TR, Thread::TidOffset<kRiscv64PointerSize>().Int32Value());
+ __ Beq(tmp, tmp2, &frame_entry_label_);
+ __ Bind(&resolution);
+
+ // Jump to the resolution stub.
+ ThreadOffset64 entrypoint_offset =
+ GetThreadOffset<kRiscv64PointerSize>(kQuickQuickResolutionTrampoline);
+ __ Loadd(tmp, TR, entrypoint_offset.Int32Value());
+ __ Jr(tmp);
+
+ __ Bind(&memory_barrier);
+ GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+ __ Bind(&frame_entry_label_);
+
+ bool do_overflow_check =
+ FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kRiscv64) || !IsLeafMethod();
+
+ if (do_overflow_check) {
+ DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
+ __ Loadw(
+ Zero, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kRiscv64)));
+ RecordPcInfo(nullptr, 0);
+ }
+
+ if (!HasEmptyFrame()) {
+ // Make sure the frame size isn't unreasonably large.
+ if (GetFrameSize() > GetStackOverflowReservedBytes(InstructionSet::kRiscv64)) {
+ LOG(FATAL) << "Stack frame larger than "
+ << GetStackOverflowReservedBytes(InstructionSet::kRiscv64) << " bytes";
+ }
+
+ // Spill callee-saved registers.
+
+ uint32_t frame_size = GetFrameSize();
+
+ IncreaseFrame(frame_size);
+
+ uint32_t offset = frame_size;
+ for (size_t i = arraysize(kCoreCalleeSaves); i != 0; ) {
+ --i;
+ XRegister reg = kCoreCalleeSaves[i];
+ if (allocated_registers_.ContainsCoreRegister(reg)) {
+ offset -= kRiscv64DoublewordSize;
+ __ Stored(reg, SP, offset);
+ __ cfi().RelOffset(dwarf::Reg::Riscv64Core(reg), offset);
+ }
+ }
+
+ for (size_t i = arraysize(kFpuCalleeSaves); i != 0; ) {
+ --i;
+ FRegister reg = kFpuCalleeSaves[i];
+ if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
+ offset -= kRiscv64DoublewordSize;
+ __ FStored(reg, SP, offset);
+ __ cfi().RelOffset(dwarf::Reg::Riscv64Fp(reg), offset);
+ }
+ }
+
+ // Save the current method if we need it. Note that we do not
+ // do this in HCurrentMethod, as the instruction might have been removed
+ // in the SSA graph.
+ if (RequiresCurrentMethod()) {
+ __ Stored(kArtMethodRegister, SP, 0);
+ }
+
+ if (GetGraph()->HasShouldDeoptimizeFlag()) {
+ // Initialize should_deoptimize flag to 0.
+ __ Storew(Zero, SP, GetStackOffsetOfShouldDeoptimizeFlag());
+ }
+ }
+ MaybeIncrementHotness(/*is_frame_entry=*/ true);
+}
+
+void CodeGeneratorRISCV64::GenerateFrameExit() {
+ __ cfi().RememberState();
+
+ if (!HasEmptyFrame()) {
+ // Restore callee-saved registers.
+
+ // For better instruction scheduling restore RA before other registers.
+ uint32_t offset = GetFrameSize();
+ for (size_t i = arraysize(kCoreCalleeSaves); i != 0; ) {
+ --i;
+ XRegister reg = kCoreCalleeSaves[i];
+ if (allocated_registers_.ContainsCoreRegister(reg)) {
+ offset -= kRiscv64DoublewordSize;
+ __ Loadd(reg, SP, offset);
+ __ cfi().Restore(dwarf::Reg::Riscv64Core(reg));
+ }
+ }
+
+ for (size_t i = arraysize(kFpuCalleeSaves); i != 0; ) {
+ --i;
+ FRegister reg = kFpuCalleeSaves[i];
+ if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
+ offset -= kRiscv64DoublewordSize;
+ __ FLoadd(reg, SP, offset);
+ __ cfi().Restore(dwarf::Reg::Riscv64Fp(reg));
+ }
+ }
+
+ DecreaseFrame(GetFrameSize());
+ }
+
+ __ Jr(RA);
+
+ __ cfi().RestoreState();
+ __ cfi().DefCFAOffset(GetFrameSize());
+}
+
+void CodeGeneratorRISCV64::Bind(HBasicBlock* block) { __ Bind(GetLabelOf(block)); }
+
+void CodeGeneratorRISCV64::MoveConstant(Location destination, int32_t value) {
+ DCHECK(destination.IsRegister());
+ __ LoadConst32(destination.AsRegister<XRegister>(), value);
+}
+
+void CodeGeneratorRISCV64::MoveLocation(Location destination,
+ Location source,
+ DataType::Type dst_type) {
+ if (source.Equals(destination)) {
+ return;
+ }
+
+ // A valid move type can always be inferred from the destination and source locations.
+ // When moving from and to a register, the `dst_type` can be used to generate 32-bit instead
+ // of 64-bit moves but it's generally OK to use 64-bit moves for 32-bit values in registers.
+ bool unspecified_type = (dst_type == DataType::Type::kVoid);
+ // TODO(riscv64): Is the destination type known in all cases?
+ // TODO(riscv64): Can unspecified `dst_type` move 32-bit GPR to FPR without NaN-boxing?
+ CHECK(!unspecified_type);
+
+ if (destination.IsRegister() || destination.IsFpuRegister()) {
+ if (unspecified_type) {
+ HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
+ if (source.IsStackSlot() ||
+ (src_cst != nullptr &&
+ (src_cst->IsIntConstant() || src_cst->IsFloatConstant() || src_cst->IsNullConstant()))) {
+ // For stack slots and 32-bit constants, a 32-bit type is appropriate.
+ dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32;
+ } else {
+ // If the source is a double stack slot or a 64-bit constant, a 64-bit type
+ // is appropriate. Else the source is a register, and since the type has not
+ // been specified, we chose a 64-bit type to force a 64-bit move.
+ dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64;
+ }
+ }
+ DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) ||
+ (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type)));
+
+ if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
+ // Move to GPR/FPR from stack
+ if (DataType::IsFloatingPointType(dst_type)) {
+ if (DataType::Is64BitType(dst_type)) {
+ __ FLoadd(destination.AsFpuRegister<FRegister>(), SP, source.GetStackIndex());
+ } else {
+ __ FLoadw(destination.AsFpuRegister<FRegister>(), SP, source.GetStackIndex());
+ }
+ } else {
+ if (DataType::Is64BitType(dst_type)) {
+ __ Loadd(destination.AsRegister<XRegister>(), SP, source.GetStackIndex());
+ } else if (dst_type == DataType::Type::kReference) {
+ __ Loadwu(destination.AsRegister<XRegister>(), SP, source.GetStackIndex());
+ } else {
+ __ Loadw(destination.AsRegister<XRegister>(), SP, source.GetStackIndex());
+ }
+ }
+ } else if (source.IsConstant()) {
+ // Move to GPR/FPR from constant
+ // TODO(riscv64): Consider using literals for difficult-to-materialize 64-bit constants.
+ int64_t value = GetInt64ValueOf(source.GetConstant()->AsConstant());
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister gpr = DataType::IsFloatingPointType(dst_type)
+ ? srs.AllocateXRegister()
+ : destination.AsRegister<XRegister>();
+ if (DataType::IsFloatingPointType(dst_type) && value == 0) {
+ gpr = Zero; // Note: The scratch register allocated above shall not be used.
+ } else {
+ // Note: For `float` we load the sign-extended value here as it can sometimes yield
+ // a shorter instruction sequence. The higher 32 bits shall be ignored during the
+ // transfer to FP reg and the result shall be correctly NaN-boxed.
+ __ LoadConst64(gpr, value);
+ }
+ if (dst_type == DataType::Type::kFloat32) {
+ __ FMvWX(destination.AsFpuRegister<FRegister>(), gpr);
+ } else if (dst_type == DataType::Type::kFloat64) {
+ __ FMvDX(destination.AsFpuRegister<FRegister>(), gpr);
+ }
+ } else if (source.IsRegister()) {
+ if (destination.IsRegister()) {
+ // Move to GPR from GPR
+ __ Mv(destination.AsRegister<XRegister>(), source.AsRegister<XRegister>());
+ } else {
+ DCHECK(destination.IsFpuRegister());
+ if (DataType::Is64BitType(dst_type)) {
+ __ FMvDX(destination.AsFpuRegister<FRegister>(), source.AsRegister<XRegister>());
+ } else {
+ __ FMvWX(destination.AsFpuRegister<FRegister>(), source.AsRegister<XRegister>());
+ }
+ }
+ } else if (source.IsFpuRegister()) {
+ if (destination.IsFpuRegister()) {
+ if (GetGraph()->HasSIMD()) {
+ LOG(FATAL) << "Vector extension is unsupported";
+ UNREACHABLE();
+ } else {
+ // Move to FPR from FPR
+ if (dst_type == DataType::Type::kFloat32) {
+ __ FMvS(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>());
+ } else {
+ DCHECK_EQ(dst_type, DataType::Type::kFloat64);
+ __ FMvD(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>());
+ }
+ }
+ } else {
+ DCHECK(destination.IsRegister());
+ if (DataType::Is64BitType(dst_type)) {
+ __ FMvXD(destination.AsRegister<XRegister>(), source.AsFpuRegister<FRegister>());
+ } else {
+ __ FMvXW(destination.AsRegister<XRegister>(), source.AsFpuRegister<FRegister>());
+ }
+ }
+ }
+ } else if (destination.IsSIMDStackSlot()) {
+ LOG(FATAL) << "SIMD is unsupported";
+ UNREACHABLE();
+ } else { // The destination is not a register. It must be a stack slot.
+ DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
+ if (source.IsRegister() || source.IsFpuRegister()) {
+ if (unspecified_type) {
+ if (source.IsRegister()) {
+ dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64;
+ } else {
+ dst_type =
+ destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64;
+ }
+ }
+ DCHECK((destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type)) &&
+ (source.IsFpuRegister() == DataType::IsFloatingPointType(dst_type)));
+ // Move to stack from GPR/FPR
+ if (DataType::Is64BitType(dst_type)) {
+ if (source.IsRegister()) {
+ __ Stored(source.AsRegister<XRegister>(), SP, destination.GetStackIndex());
+ } else {
+ __ FStored(source.AsFpuRegister<FRegister>(), SP, destination.GetStackIndex());
+ }
+ } else {
+ if (source.IsRegister()) {
+ __ Storew(source.AsRegister<XRegister>(), SP, destination.GetStackIndex());
+ } else {
+ __ FStorew(source.AsFpuRegister<FRegister>(), SP, destination.GetStackIndex());
+ }
+ }
+ } else if (source.IsConstant()) {
+ // Move to stack from constant
+ int64_t value = GetInt64ValueOf(source.GetConstant());
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister gpr = (value != 0) ? srs.AllocateXRegister() : Zero;
+ if (value != 0) {
+ __ LoadConst64(gpr, value);
+ }
+ if (destination.IsStackSlot()) {
+ __ Storew(gpr, SP, destination.GetStackIndex());
+ } else {
+ DCHECK(destination.IsDoubleStackSlot());
+ __ Stored(gpr, SP, destination.GetStackIndex());
+ }
+ } else {
+ DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
+ DCHECK_EQ(source.IsDoubleStackSlot(), destination.IsDoubleStackSlot());
+ // Move to stack from stack
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ if (destination.IsStackSlot()) {
+ __ Loadw(tmp, SP, source.GetStackIndex());
+ __ Storew(tmp, SP, destination.GetStackIndex());
+ } else {
+ __ Loadd(tmp, SP, source.GetStackIndex());
+ __ Stored(tmp, SP, destination.GetStackIndex());
+ }
+ }
+ }
+}
+
+void CodeGeneratorRISCV64::AddLocationAsTemp(Location location, LocationSummary* locations) {
+ if (location.IsRegister()) {
+ locations->AddTemp(location);
+ } else {
+ UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
+ }
+}
+
+void CodeGeneratorRISCV64::SetupBlockedRegisters() const {
+ // ZERO, GP, SP, RA, TP and TR(S1) are reserved and can't be allocated.
+ blocked_core_registers_[Zero] = true;
+ blocked_core_registers_[GP] = true;
+ blocked_core_registers_[SP] = true;
+ blocked_core_registers_[RA] = true;
+ blocked_core_registers_[TP] = true;
+ blocked_core_registers_[TR] = true; // ART Thread register.
+
+ // TMP(T6), TMP2(T5) and FTMP(FT11) are used as temporary/scratch registers.
+ blocked_core_registers_[TMP] = true;
+ blocked_core_registers_[TMP2] = true;
+ blocked_fpu_registers_[FTMP] = true;
+
+ if (GetGraph()->IsDebuggable()) {
+ // Stubs do not save callee-save floating point registers. If the graph
+ // is debuggable, we need to deal with these registers differently. For
+ // now, just block them.
+ for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
+ blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
+ }
+ }
+}
+
+size_t CodeGeneratorRISCV64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
+ __ Stored(XRegister(reg_id), SP, stack_index);
+ return kRiscv64DoublewordSize;
+}
+
+size_t CodeGeneratorRISCV64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
+ __ Loadd(XRegister(reg_id), SP, stack_index);
+ return kRiscv64DoublewordSize;
+}
+
+size_t CodeGeneratorRISCV64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+ if (GetGraph()->HasSIMD()) {
+ // TODO(riscv64): RISC-V vector extension.
+ UNIMPLEMENTED(FATAL) << "Vector extension is unsupported";
+ UNREACHABLE();
+ }
+ __ FStored(FRegister(reg_id), SP, stack_index);
+ return kRiscv64FloatRegSizeInBytes;
+}
+
+size_t CodeGeneratorRISCV64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+ if (GetGraph()->HasSIMD()) {
+ // TODO(riscv64): RISC-V vector extension.
+ UNIMPLEMENTED(FATAL) << "Vector extension is unsupported";
+ UNREACHABLE();
+ }
+ __ FLoadd(FRegister(reg_id), SP, stack_index);
+ return kRiscv64FloatRegSizeInBytes;
+}
+
+void CodeGeneratorRISCV64::DumpCoreRegister(std::ostream& stream, int reg) const {
+ stream << XRegister(reg);
+}
+
+void CodeGeneratorRISCV64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
+ stream << FRegister(reg);
+}
+
+void CodeGeneratorRISCV64::Finalize() {
+ // Ensure that we fix up branches and literal loads and emit the literal pool.
+ __ FinalizeCode();
+
+ // Adjust native pc offsets in stack maps.
+ StackMapStream* stack_map_stream = GetStackMapStream();
+ for (size_t i = 0, num = stack_map_stream->GetNumberOfStackMaps(); i != num; ++i) {
+ uint32_t old_position = stack_map_stream->GetStackMapNativePcOffset(i);
+ uint32_t new_position = __ GetAdjustedPosition(old_position);
+ DCHECK_GE(new_position, old_position);
+ stack_map_stream->SetStackMapNativePcOffset(i, new_position);
+ }
+
+ // Adjust pc offsets for the disassembly information.
+ if (disasm_info_ != nullptr) {
+ GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval();
+ frame_entry_interval->start = __ GetAdjustedPosition(frame_entry_interval->start);
+ frame_entry_interval->end = __ GetAdjustedPosition(frame_entry_interval->end);
+ for (auto& entry : *disasm_info_->GetInstructionIntervals()) {
+ entry.second.start = __ GetAdjustedPosition(entry.second.start);
+ entry.second.end = __ GetAdjustedPosition(entry.second.end);
+ }
+ for (auto& entry : *disasm_info_->GetSlowPathIntervals()) {
+ entry.code_interval.start = __ GetAdjustedPosition(entry.code_interval.start);
+ entry.code_interval.end = __ GetAdjustedPosition(entry.code_interval.end);
+ }
+ }
+}
+
+// Generate code to invoke a runtime entry point.
+void CodeGeneratorRISCV64::InvokeRuntime(QuickEntrypointEnum entrypoint,
+ HInstruction* instruction,
+ uint32_t dex_pc,
+ SlowPathCode* slow_path) {
+ ValidateInvokeRuntime(entrypoint, instruction, slow_path);
+
+ ThreadOffset64 entrypoint_offset = GetThreadOffset<kRiscv64PointerSize>(entrypoint);
+
+ // TODO(riscv64): Reduce code size for AOT by using shared trampolines for slow path
+ // runtime calls across the entire oat file.
+ __ Loadd(RA, TR, entrypoint_offset.Int32Value());
+ __ Jalr(RA);
+ if (EntrypointRequiresStackMap(entrypoint)) {
+ RecordPcInfo(instruction, dex_pc, slow_path);
+ }
+}
+
+// Generate code to invoke a runtime entry point, but do not record
+// PC-related information in a stack map.
+void CodeGeneratorRISCV64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path) {
+ ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+ __ Loadd(RA, TR, entry_point_offset);
+ __ Jalr(RA);
+}
+
+void CodeGeneratorRISCV64::IncreaseFrame(size_t adjustment) {
+ int32_t adjustment32 = dchecked_integral_cast<int32_t>(adjustment);
+ __ AddConst64(SP, SP, -adjustment32);
+ GetAssembler()->cfi().AdjustCFAOffset(adjustment32);
+}
+
+void CodeGeneratorRISCV64::DecreaseFrame(size_t adjustment) {
+ int32_t adjustment32 = dchecked_integral_cast<int32_t>(adjustment);
+ __ AddConst64(SP, SP, adjustment32);
+ GetAssembler()->cfi().AdjustCFAOffset(-adjustment32);
+}
+
+void CodeGeneratorRISCV64::GenerateNop() {
+ __ Nop();
+}
+
+void CodeGeneratorRISCV64::GenerateImplicitNullCheck(HNullCheck* instruction) {
+ if (CanMoveNullCheckToUser(instruction)) {
+ return;
+ }
+ Location obj = instruction->GetLocations()->InAt(0);
+
+ __ Lw(Zero, obj.AsRegister<XRegister>(), 0);
+ RecordPcInfo(instruction, instruction->GetDexPc());
+}
+
+void CodeGeneratorRISCV64::GenerateExplicitNullCheck(HNullCheck* instruction) {
+ SlowPathCodeRISCV64* slow_path = new (GetScopedAllocator()) NullCheckSlowPathRISCV64(instruction);
+ AddSlowPath(slow_path);
+
+ Location obj = instruction->GetLocations()->InAt(0);
+
+ __ Beqz(obj.AsRegister<XRegister>(), slow_path->GetEntryLabel());
+}
+
+HLoadString::LoadKind CodeGeneratorRISCV64::GetSupportedLoadStringKind(
+ HLoadString::LoadKind desired_string_load_kind) {
+ switch (desired_string_load_kind) {
+ case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+ case HLoadString::LoadKind::kBootImageRelRo:
+ case HLoadString::LoadKind::kBssEntry:
+ DCHECK(!Runtime::Current()->UseJitCompilation());
+ break;
+ case HLoadString::LoadKind::kJitBootImageAddress:
+ case HLoadString::LoadKind::kJitTableAddress:
+ DCHECK(Runtime::Current()->UseJitCompilation());
+ break;
+ case HLoadString::LoadKind::kRuntimeCall:
+ break;
+ }
+ return desired_string_load_kind;
+}
+
+HLoadClass::LoadKind CodeGeneratorRISCV64::GetSupportedLoadClassKind(
+ HLoadClass::LoadKind desired_class_load_kind) {
+ switch (desired_class_load_kind) {
+ case HLoadClass::LoadKind::kInvalid:
+ LOG(FATAL) << "UNREACHABLE";
+ UNREACHABLE();
+ case HLoadClass::LoadKind::kReferrersClass:
+ break;
+ case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+ case HLoadClass::LoadKind::kBootImageRelRo:
+ case HLoadClass::LoadKind::kBssEntry:
+ case HLoadClass::LoadKind::kBssEntryPublic:
+ case HLoadClass::LoadKind::kBssEntryPackage:
+ DCHECK(!Runtime::Current()->UseJitCompilation());
+ break;
+ case HLoadClass::LoadKind::kJitBootImageAddress:
+ case HLoadClass::LoadKind::kJitTableAddress:
+ DCHECK(Runtime::Current()->UseJitCompilation());
+ break;
+ case HLoadClass::LoadKind::kRuntimeCall:
+ break;
+ }
+ return desired_class_load_kind;
+}
+
+HInvokeStaticOrDirect::DispatchInfo CodeGeneratorRISCV64::GetSupportedInvokeStaticOrDirectDispatch(
+ const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, ArtMethod* method) {
+ UNUSED(method);
+ // On RISCV64 we support all dispatch types.
+ return desired_dispatch_info;
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageIntrinsicPatch(
+ uint32_t intrinsic_data, const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(
+ /* dex_file= */ nullptr, intrinsic_data, info_high, &boot_image_other_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageRelRoPatch(
+ uint32_t boot_image_offset, const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(
+ /* dex_file= */ nullptr, boot_image_offset, info_high, &boot_image_other_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageMethodPatch(
+ MethodReference target_method, const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(
+ target_method.dex_file, target_method.index, info_high, &boot_image_method_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewMethodBssEntryPatch(
+ MethodReference target_method, const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(
+ target_method.dex_file, target_method.index, info_high, &method_bss_entry_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageTypePatch(
+ const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(&dex_file, type_index.index_, info_high, &boot_image_type_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageJniEntrypointPatch(
+ MethodReference target_method, const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(
+ target_method.dex_file, target_method.index, info_high, &boot_image_jni_entrypoint_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewTypeBssEntryPatch(
+ HLoadClass* load_class,
+ const PcRelativePatchInfo* info_high) {
+ const DexFile& dex_file = load_class->GetDexFile();
+ dex::TypeIndex type_index = load_class->GetTypeIndex();
+ ArenaDeque<PcRelativePatchInfo>* patches = nullptr;
+ switch (load_class->GetLoadKind()) {
+ case HLoadClass::LoadKind::kBssEntry:
+ patches = &type_bss_entry_patches_;
+ break;
+ case HLoadClass::LoadKind::kBssEntryPublic:
+ patches = &public_type_bss_entry_patches_;
+ break;
+ case HLoadClass::LoadKind::kBssEntryPackage:
+ patches = &package_type_bss_entry_patches_;
+ break;
+ default:
+ LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
+ UNREACHABLE();
+ }
+ return NewPcRelativePatch(&dex_file, type_index.index_, info_high, patches);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageStringPatch(
+ const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(&dex_file, string_index.index_, info_high, &boot_image_string_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewStringBssEntryPatch(
+ const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(&dex_file, string_index.index_, info_high, &string_bss_entry_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewPcRelativePatch(
+ const DexFile* dex_file,
+ uint32_t offset_or_index,
+ const PcRelativePatchInfo* info_high,
+ ArenaDeque<PcRelativePatchInfo>* patches) {
+ patches->emplace_back(dex_file, offset_or_index, info_high);
+ return &patches->back();
+}
+
+Literal* CodeGeneratorRISCV64::DeduplicateUint32Literal(uint32_t value) {
+ return uint32_literals_.GetOrCreate(value,
+ [this, value]() { return __ NewLiteral<uint32_t>(value); });
+}
+
+Literal* CodeGeneratorRISCV64::DeduplicateUint64Literal(uint64_t value) {
+ return uint64_literals_.GetOrCreate(value,
+ [this, value]() { return __ NewLiteral<uint64_t>(value); });
+}
+
+Literal* CodeGeneratorRISCV64::DeduplicateBootImageAddressLiteral(uint64_t address) {
+ return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address));
+}
+
+Literal* CodeGeneratorRISCV64::DeduplicateJitStringLiteral(const DexFile& dex_file,
+ dex::StringIndex string_index,
+ Handle<mirror::String> handle) {
+ ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
+ return jit_string_patches_.GetOrCreate(
+ StringReference(&dex_file, string_index),
+ [this]() { return __ NewLiteral<uint32_t>(/* value= */ 0u); });
+}
+
+Literal* CodeGeneratorRISCV64::DeduplicateJitClassLiteral(const DexFile& dex_file,
+ dex::TypeIndex type_index,
+ Handle<mirror::Class> handle) {
+ ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
+ return jit_class_patches_.GetOrCreate(
+ TypeReference(&dex_file, type_index),
+ [this]() { return __ NewLiteral<uint32_t>(/* value= */ 0u); });
+}
+
+void CodeGeneratorRISCV64::PatchJitRootUse(uint8_t* code,
+ const uint8_t* roots_data,
+ const Literal* literal,
+ uint64_t index_in_table) const {
+ uint32_t literal_offset = GetAssembler().GetLabelLocation(literal->GetLabel());
+ uintptr_t address =
+ reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
+ reinterpret_cast<uint32_t*>(code + literal_offset)[0] = dchecked_integral_cast<uint32_t>(address);
+}
+
+void CodeGeneratorRISCV64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
+ for (const auto& entry : jit_string_patches_) {
+ const StringReference& string_reference = entry.first;
+ Literal* table_entry_literal = entry.second;
+ uint64_t index_in_table = GetJitStringRootIndex(string_reference);
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
+ }
+ for (const auto& entry : jit_class_patches_) {
+ const TypeReference& type_reference = entry.first;
+ Literal* table_entry_literal = entry.second;
+ uint64_t index_in_table = GetJitClassRootIndex(type_reference);
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
+ }
+}
+
+void CodeGeneratorRISCV64::EmitPcRelativeAuipcPlaceholder(PcRelativePatchInfo* info_high,
+ XRegister out) {
+ DCHECK(info_high->pc_insn_label == &info_high->label);
+ __ Bind(&info_high->label);
+ __ Auipc(out, /*imm20=*/ kLinkTimeOffsetPlaceholderHigh);
+}
+
+void CodeGeneratorRISCV64::EmitPcRelativeAddiPlaceholder(PcRelativePatchInfo* info_low,
+ XRegister rd,
+ XRegister rs1) {
+ DCHECK(info_low->pc_insn_label != &info_low->label);
+ __ Bind(&info_low->label);
+ __ Addi(rd, rs1, /*imm12=*/ kLinkTimeOffsetPlaceholderLow);
+}
+
+void CodeGeneratorRISCV64::EmitPcRelativeLwuPlaceholder(PcRelativePatchInfo* info_low,
+ XRegister rd,
+ XRegister rs1) {
+ DCHECK(info_low->pc_insn_label != &info_low->label);
+ __ Bind(&info_low->label);
+ __ Lwu(rd, rs1, /*offset=*/ kLinkTimeOffsetPlaceholderLow);
+}
+
+void CodeGeneratorRISCV64::EmitPcRelativeLdPlaceholder(PcRelativePatchInfo* info_low,
+ XRegister rd,
+ XRegister rs1) {
+ DCHECK(info_low->pc_insn_label != &info_low->label);
+ __ Bind(&info_low->label);
+ __ Ld(rd, rs1, /*offset=*/ kLinkTimeOffsetPlaceholderLow);
+}
+
+template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+inline void CodeGeneratorRISCV64::EmitPcRelativeLinkerPatches(
+ const ArenaDeque<PcRelativePatchInfo>& infos,
+ ArenaVector<linker::LinkerPatch>* linker_patches) {
+ for (const PcRelativePatchInfo& info : infos) {
+ linker_patches->push_back(Factory(__ GetLabelLocation(&info.label),
+ info.target_dex_file,
+ __ GetLabelLocation(info.pc_insn_label),
+ info.offset_or_index));
+ }
+}
+
+template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
+linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t boot_image_offset) {
+ DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
+ return Factory(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
+void CodeGeneratorRISCV64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
+ DCHECK(linker_patches->empty());
+ size_t size =
+ boot_image_method_patches_.size() +
+ method_bss_entry_patches_.size() +
+ boot_image_type_patches_.size() +
+ type_bss_entry_patches_.size() +
+ public_type_bss_entry_patches_.size() +
+ package_type_bss_entry_patches_.size() +
+ boot_image_string_patches_.size() +
+ string_bss_entry_patches_.size() +
+ boot_image_jni_entrypoint_patches_.size() +
+ boot_image_other_patches_.size();
+ linker_patches->reserve(size);
+ if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
+ boot_image_method_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
+ boot_image_type_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
+ boot_image_string_patches_, linker_patches);
+ } else {
+ DCHECK(boot_image_method_patches_.empty());
+ DCHECK(boot_image_type_patches_.empty());
+ DCHECK(boot_image_string_patches_.empty());
+ }
+ if (GetCompilerOptions().IsBootImage()) {
+ EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
+ boot_image_other_patches_, linker_patches);
+ } else {
+ EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
+ boot_image_other_patches_, linker_patches);
+ }
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
+ method_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
+ type_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
+ public_type_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
+ package_type_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
+ string_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
+ boot_image_jni_entrypoint_patches_, linker_patches);
+ DCHECK_EQ(size, linker_patches->size());
+}
+
+void CodeGeneratorRISCV64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
+ switch (load_kind) {
+ case MethodLoadKind::kBootImageLinkTimePcRelative: {
+ DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_high =
+ NewBootImageMethodPatch(invoke->GetResolvedMethodReference());
+ EmitPcRelativeAuipcPlaceholder(info_high, temp.AsRegister<XRegister>());
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_low =
+ NewBootImageMethodPatch(invoke->GetResolvedMethodReference(), info_high);
+ EmitPcRelativeAddiPlaceholder(
+ info_low, temp.AsRegister<XRegister>(), temp.AsRegister<XRegister>());
+ break;
+ }
+ case MethodLoadKind::kBootImageRelRo: {
+ uint32_t boot_image_offset = GetBootImageOffset(invoke);
+ PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset);
+ EmitPcRelativeAuipcPlaceholder(info_high, temp.AsRegister<XRegister>());
+ PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high);
+ // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
+ EmitPcRelativeLwuPlaceholder(
+ info_low, temp.AsRegister<XRegister>(), temp.AsRegister<XRegister>());
+ break;
+ }
+ case MethodLoadKind::kBssEntry: {
+ PcRelativePatchInfo* info_high = NewMethodBssEntryPatch(invoke->GetMethodReference());
+ EmitPcRelativeAuipcPlaceholder(info_high, temp.AsRegister<XRegister>());
+ PcRelativePatchInfo* info_low =
+ NewMethodBssEntryPatch(invoke->GetMethodReference(), info_high);
+ EmitPcRelativeLdPlaceholder(
+ info_low, temp.AsRegister<XRegister>(), temp.AsRegister<XRegister>());
+ break;
+ }
+ case MethodLoadKind::kJitDirectAddress: {
+ __ LoadConst64(temp.AsRegister<XRegister>(),
+ reinterpret_cast<uint64_t>(invoke->GetResolvedMethod()));
+ break;
+ }
+ case MethodLoadKind::kRuntimeCall: {
+ // Test situation, don't do anything.
+ break;
+ }
+ default: {
+ LOG(FATAL) << "Load kind should have already been handled " << load_kind;
+ UNREACHABLE();
+ }
+ }
+}
+
+void CodeGeneratorRISCV64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+ Location temp,
+ SlowPathCode* slow_path) {
+ // All registers are assumed to be correctly set up per the calling convention.
+ Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
+
+ switch (invoke->GetMethodLoadKind()) {
+ case MethodLoadKind::kStringInit: {
+ // temp = thread->string_init_entrypoint
+ uint32_t offset =
+ GetThreadOffset<kRiscv64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
+ __ Loadd(temp.AsRegister<XRegister>(), TR, offset);
+ break;
+ }
+ case MethodLoadKind::kRecursive:
+ callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
+ break;
+ case MethodLoadKind::kRuntimeCall:
+ GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
+ return; // No code pointer retrieval; the runtime performs the call directly.
+ case MethodLoadKind::kBootImageLinkTimePcRelative:
+ DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
+ if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
+ // Do not materialize the method pointer, load directly the entrypoint.
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_high =
+ NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference());
+ EmitPcRelativeAuipcPlaceholder(info_high, RA);
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_low =
+ NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference(), info_high);
+ EmitPcRelativeLdPlaceholder(info_low, RA, RA);
+ break;
+ }
+ FALLTHROUGH_INTENDED;
+ default:
+ LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
+ break;
+ }
+
+ switch (invoke->GetCodePtrLocation()) {
+ case CodePtrLocation::kCallSelf:
+ DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
+ __ Jal(&frame_entry_label_);
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ break;
+ case CodePtrLocation::kCallArtMethod:
+ // RA = callee_method->entry_point_from_quick_compiled_code_;
+ __ Loadd(RA,
+ callee_method.AsRegister<XRegister>(),
+ ArtMethod::EntryPointFromQuickCompiledCodeOffset(kRiscv64PointerSize).Int32Value());
+ // RA()
+ __ Jalr(RA);
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ break;
+ case CodePtrLocation::kCallCriticalNative: {
+ size_t out_frame_size =
+ PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorRiscv64,
+ kNativeStackAlignment,
+ GetCriticalNativeDirectCallFrameSize>(invoke);
+ if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
+ // Entrypoint is already loaded in RA.
+ } else {
+ // RA = callee_method->ptr_sized_fields_.data_; // EntryPointFromJni
+ MemberOffset offset = ArtMethod::EntryPointFromJniOffset(kRiscv64PointerSize);
+ __ Loadd(RA, callee_method.AsRegister<XRegister>(), offset.Int32Value());
+ }
+ __ Jalr(RA);
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ // The result is returned the same way in native ABI and managed ABI. No result conversion is
+ // needed, see comments in `Riscv64JniCallingConvention::RequiresSmallResultTypeExtension()`.
+ if (out_frame_size != 0u) {
+ DecreaseFrame(out_frame_size);
+ }
+ break;
+ }
+ }
+
+ DCHECK(!IsLeafMethod());
+}
+
+void CodeGeneratorRISCV64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
+ XRegister klass) {
+ // We know the destination of an intrinsic, so no need to record inline caches.
+ if (!instruction->GetLocations()->Intrinsified() &&
+ GetGraph()->IsCompilingBaseline() &&
+ !Runtime::Current()->IsAotCompiler()) {
+ DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
+ uint64_t address = reinterpret_cast64<uint64_t>(cache);
+ Riscv64Label done;
+ // The `art_quick_update_inline_cache` expects the inline cache in T5.
+ XRegister ic_reg = T5;
+ ScratchRegisterScope srs(GetAssembler());
+ DCHECK_EQ(srs.AvailableXRegisters(), 2u);
+ srs.ExcludeXRegister(ic_reg);
+ DCHECK_EQ(srs.AvailableXRegisters(), 1u);
+ __ LoadConst64(ic_reg, address);
+ {
+ ScratchRegisterScope srs2(GetAssembler());
+ XRegister tmp = srs2.AllocateXRegister();
+ __ Loadd(tmp, ic_reg, InlineCache::ClassesOffset().Int32Value());
+ // Fast path for a monomorphic cache.
+ __ Beq(klass, tmp, &done);
+ }
+ InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
+ __ Bind(&done);
+ }
+}
+
+void CodeGeneratorRISCV64::GenerateVirtualCall(HInvokeVirtual* invoke,
+ Location temp_location,
+ SlowPathCode* slow_path) {
+ // Use the calling convention instead of the location of the receiver, as
+ // intrinsics may have put the receiver in a different register. In the intrinsics
+ // slow path, the arguments have been moved to the right place, so here we are
+ // guaranteed that the receiver is the first register of the calling convention.
+ InvokeDexCallingConvention calling_convention;
+ XRegister receiver = calling_convention.GetRegisterAt(0);
+ XRegister temp = temp_location.AsRegister<XRegister>();
+ MemberOffset method_offset =
+ mirror::Class::EmbeddedVTableEntryOffset(invoke->GetVTableIndex(), kRiscv64PointerSize);
+ MemberOffset class_offset = mirror::Object::ClassOffset();
+ Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kRiscv64PointerSize);
+
+ // temp = object->GetClass();
+ __ Loadwu(temp, receiver, class_offset.Int32Value());
+ MaybeRecordImplicitNullCheck(invoke);
+ // Instead of simply (possibly) unpoisoning `temp` here, we should
+ // emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
+ // intermediate/temporary reference and because the current
+ // concurrent copying collector keeps the from-space memory
+ // intact/accessible until the end of the marking phase (the
+ // concurrent copying collector may not in the future).
+ MaybeUnpoisonHeapReference(temp);
+
+ // If we're compiling baseline, update the inline cache.
+ MaybeGenerateInlineCacheCheck(invoke, temp);
+
+ // temp = temp->GetMethodAt(method_offset);
+ __ Loadd(temp, temp, method_offset.Int32Value());
+ // RA = temp->GetEntryPoint();
+ __ Loadd(RA, temp, entry_point.Int32Value());
+ // RA();
+ __ Jalr(RA);
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+}
+
+void CodeGeneratorRISCV64::MoveFromReturnRegister(Location trg, DataType::Type type) {
+ if (!trg.IsValid()) {
+ DCHECK_EQ(type, DataType::Type::kVoid);
+ return;
+ }
+
+ DCHECK_NE(type, DataType::Type::kVoid);
+
+ if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) {
+ XRegister trg_reg = trg.AsRegister<XRegister>();
+ XRegister res_reg = Riscv64ReturnLocation(type).AsRegister<XRegister>();
+ if (trg_reg != res_reg) {
+ __ Mv(trg_reg, res_reg);
+ }
+ } else {
+ FRegister trg_reg = trg.AsFpuRegister<FRegister>();
+ FRegister res_reg = Riscv64ReturnLocation(type).AsFpuRegister<FRegister>();
+ if (trg_reg != res_reg) {
+ __ FMvD(trg_reg, res_reg); // 64-bit move is OK also for `float`.
+ }
+ }
+}
+
+void CodeGeneratorRISCV64::PoisonHeapReference(XRegister reg) {
+ __ Sub(reg, Zero, reg); // Negate the ref.
+ __ ZextW(reg, reg); // Zero-extend the 32-bit ref.
+}
+
+void CodeGeneratorRISCV64::UnpoisonHeapReference(XRegister reg) {
+ __ Sub(reg, Zero, reg); // Negate the ref.
+ __ ZextW(reg, reg); // Zero-extend the 32-bit ref.
+}
+
+inline void CodeGeneratorRISCV64::MaybePoisonHeapReference(XRegister reg) {
+ if (kPoisonHeapReferences) {
+ PoisonHeapReference(reg);
+ }
+}
+
+inline void CodeGeneratorRISCV64::MaybeUnpoisonHeapReference(XRegister reg) {
+ if (kPoisonHeapReferences) {
+ UnpoisonHeapReference(reg);
+ }
+}
+
+void CodeGeneratorRISCV64::SwapLocations(Location loc1, Location loc2, DataType::Type type) {
+ DCHECK(!loc1.IsConstant());
+ DCHECK(!loc2.IsConstant());
+
+ if (loc1.Equals(loc2)) {
+ return;
+ }
+
+ bool is_slot1 = loc1.IsStackSlot() || loc1.IsDoubleStackSlot();
+ bool is_slot2 = loc2.IsStackSlot() || loc2.IsDoubleStackSlot();
+ bool is_simd1 = loc1.IsSIMDStackSlot();
+ bool is_simd2 = loc2.IsSIMDStackSlot();
+ bool is_fp_reg1 = loc1.IsFpuRegister();
+ bool is_fp_reg2 = loc2.IsFpuRegister();
+
+ if ((is_slot1 != is_slot2) ||
+ (loc2.IsRegister() && loc1.IsRegister()) ||
+ (is_fp_reg2 && is_fp_reg1)) {
+ if ((is_fp_reg2 && is_fp_reg1) && GetGraph()->HasSIMD()) {
+ LOG(FATAL) << "Unsupported";
+ UNREACHABLE();
+ }
+ ScratchRegisterScope srs(GetAssembler());
+ Location tmp = (is_fp_reg2 || is_fp_reg1)
+ ? Location::FpuRegisterLocation(srs.AllocateFRegister())
+ : Location::RegisterLocation(srs.AllocateXRegister());
+ MoveLocation(tmp, loc1, type);
+ MoveLocation(loc1, loc2, type);
+ MoveLocation(loc2, tmp, type);
+ } else if (is_slot1 && is_slot2) {
+ move_resolver_.Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), loc1.IsDoubleStackSlot());
+ } else if (is_simd1 && is_simd2) {
+ // TODO(riscv64): Add VECTOR/SIMD later.
+ UNIMPLEMENTED(FATAL) << "Vector extension is unsupported";
+ } else if ((is_fp_reg1 && is_simd2) || (is_fp_reg2 && is_simd1)) {
+ // TODO(riscv64): Add VECTOR/SIMD later.
+ UNIMPLEMENTED(FATAL) << "Vector extension is unsupported";
+ } else {
+ LOG(FATAL) << "Unimplemented swap between locations " << loc1 << " and " << loc2;
+ }
+}
+
+} // namespace riscv64
+} // namespace art
diff --git a/compiler/optimizing/code_generator_riscv64.h b/compiler/optimizing/code_generator_riscv64.h
index 405b39aa0a..375cec957f 100644
--- a/compiler/optimizing/code_generator_riscv64.h
+++ b/compiler/optimizing/code_generator_riscv64.h
@@ -17,7 +17,888 @@
#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_
+#include "android-base/logging.h"
+#include "arch/riscv64/registers_riscv64.h"
+#include "base/macros.h"
#include "code_generator.h"
#include "driver/compiler_options.h"
+#include "intrinsics_list.h"
+#include "optimizing/locations.h"
+#include "parallel_move_resolver.h"
+#include "utils/riscv64/assembler_riscv64.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+// InvokeDexCallingConvention registers
+static constexpr XRegister kParameterCoreRegisters[] = {A1, A2, A3, A4, A5, A6, A7};
+static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
+
+static constexpr FRegister kParameterFpuRegisters[] = {FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7};
+static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters);
+
+// InvokeRuntimeCallingConvention registers
+static constexpr XRegister kRuntimeParameterCoreRegisters[] = {A0, A1, A2, A3, A4, A5, A6, A7};
+static constexpr size_t kRuntimeParameterCoreRegistersLength =
+ arraysize(kRuntimeParameterCoreRegisters);
+
+static constexpr FRegister kRuntimeParameterFpuRegisters[] = {
+ FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7
+};
+static constexpr size_t kRuntimeParameterFpuRegistersLength =
+ arraysize(kRuntimeParameterFpuRegisters);
+
+#define UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(V) \
+ V(IntegerReverse) \
+ V(IntegerDivideUnsigned) \
+ V(LongReverse) \
+ V(LongDivideUnsigned) \
+ V(MathFmaDouble) \
+ V(MathFmaFloat) \
+ V(MathCos) \
+ V(MathSin) \
+ V(MathAcos) \
+ V(MathAsin) \
+ V(MathAtan) \
+ V(MathAtan2) \
+ V(MathPow) \
+ V(MathCbrt) \
+ V(MathCosh) \
+ V(MathExp) \
+ V(MathExpm1) \
+ V(MathHypot) \
+ V(MathLog) \
+ V(MathLog10) \
+ V(MathNextAfter) \
+ V(MathSinh) \
+ V(MathTan) \
+ V(MathTanh) \
+ V(MathSqrt) \
+ V(MathCeil) \
+ V(MathFloor) \
+ V(MathRint) \
+ V(MathRoundDouble) \
+ V(MathRoundFloat) \
+ V(MathMultiplyHigh) \
+ V(SystemArrayCopyByte) \
+ V(SystemArrayCopyChar) \
+ V(SystemArrayCopyInt) \
+ V(SystemArrayCopy) \
+ V(ThreadCurrentThread) \
+ V(FP16Ceil) \
+ V(FP16Compare) \
+ V(FP16Floor) \
+ V(FP16Rint) \
+ V(FP16ToFloat) \
+ V(FP16ToHalf) \
+ V(FP16Greater) \
+ V(FP16GreaterEquals) \
+ V(FP16Less) \
+ V(FP16LessEquals) \
+ V(FP16Min) \
+ V(FP16Max) \
+ V(StringCompareTo) \
+ V(StringEquals) \
+ V(StringGetCharsNoCheck) \
+ V(StringIndexOf) \
+ V(StringIndexOfAfter) \
+ V(StringStringIndexOf) \
+ V(StringStringIndexOfAfter) \
+ V(StringNewStringFromBytes) \
+ V(StringNewStringFromChars) \
+ V(StringNewStringFromString) \
+ V(StringBufferAppend) \
+ V(StringBufferLength) \
+ V(StringBufferToString) \
+ V(StringBuilderAppendObject) \
+ V(StringBuilderAppendString) \
+ V(StringBuilderAppendCharSequence) \
+ V(StringBuilderAppendCharArray) \
+ V(StringBuilderAppendBoolean) \
+ V(StringBuilderAppendChar) \
+ V(StringBuilderAppendInt) \
+ V(StringBuilderAppendLong) \
+ V(StringBuilderAppendFloat) \
+ V(StringBuilderAppendDouble) \
+ V(StringBuilderLength) \
+ V(StringBuilderToString) \
+ V(UnsafeCASInt) \
+ V(UnsafeCASLong) \
+ V(UnsafeCASObject) \
+ V(UnsafeGet) \
+ V(UnsafeGetVolatile) \
+ V(UnsafeGetObject) \
+ V(UnsafeGetObjectVolatile) \
+ V(UnsafeGetLong) \
+ V(UnsafeGetLongVolatile) \
+ V(UnsafePut) \
+ V(UnsafePutOrdered) \
+ V(UnsafePutVolatile) \
+ V(UnsafePutObject) \
+ V(UnsafePutObjectOrdered) \
+ V(UnsafePutObjectVolatile) \
+ V(UnsafePutLong) \
+ V(UnsafePutLongOrdered) \
+ V(UnsafePutLongVolatile) \
+ V(UnsafeGetAndAddInt) \
+ V(UnsafeGetAndAddLong) \
+ V(UnsafeGetAndSetInt) \
+ V(UnsafeGetAndSetLong) \
+ V(UnsafeGetAndSetObject) \
+ V(JdkUnsafeCASInt) \
+ V(JdkUnsafeCASLong) \
+ V(JdkUnsafeCASObject) \
+ V(JdkUnsafeCompareAndSetInt) \
+ V(JdkUnsafeCompareAndSetLong) \
+ V(JdkUnsafeCompareAndSetObject) \
+ V(JdkUnsafeGet) \
+ V(JdkUnsafeGetVolatile) \
+ V(JdkUnsafeGetAcquire) \
+ V(JdkUnsafeGetObject) \
+ V(JdkUnsafeGetObjectVolatile) \
+ V(JdkUnsafeGetObjectAcquire) \
+ V(JdkUnsafeGetLong) \
+ V(JdkUnsafeGetLongVolatile) \
+ V(JdkUnsafeGetLongAcquire) \
+ V(JdkUnsafePut) \
+ V(JdkUnsafePutOrdered) \
+ V(JdkUnsafePutRelease) \
+ V(JdkUnsafePutVolatile) \
+ V(JdkUnsafePutObject) \
+ V(JdkUnsafePutObjectOrdered) \
+ V(JdkUnsafePutObjectVolatile) \
+ V(JdkUnsafePutObjectRelease) \
+ V(JdkUnsafePutLong) \
+ V(JdkUnsafePutLongOrdered) \
+ V(JdkUnsafePutLongVolatile) \
+ V(JdkUnsafePutLongRelease) \
+ V(JdkUnsafeGetAndAddInt) \
+ V(JdkUnsafeGetAndAddLong) \
+ V(JdkUnsafeGetAndSetInt) \
+ V(JdkUnsafeGetAndSetLong) \
+ V(JdkUnsafeGetAndSetObject) \
+ V(ReferenceGetReferent) \
+ V(ReferenceRefersTo) \
+ V(IntegerValueOf) \
+ V(ThreadInterrupted) \
+ V(ReachabilityFence) \
+ V(CRC32Update) \
+ V(CRC32UpdateBytes) \
+ V(CRC32UpdateByteBuffer) \
+ V(MethodHandleInvokeExact) \
+ V(MethodHandleInvoke) \
+ V(VarHandleCompareAndExchange) \
+ V(VarHandleCompareAndExchangeAcquire) \
+ V(VarHandleCompareAndExchangeRelease) \
+ V(VarHandleCompareAndSet) \
+ V(VarHandleGet) \
+ V(VarHandleGetAcquire) \
+ V(VarHandleGetAndAdd) \
+ V(VarHandleGetAndAddAcquire) \
+ V(VarHandleGetAndAddRelease) \
+ V(VarHandleGetAndBitwiseAnd) \
+ V(VarHandleGetAndBitwiseAndAcquire) \
+ V(VarHandleGetAndBitwiseAndRelease) \
+ V(VarHandleGetAndBitwiseOr) \
+ V(VarHandleGetAndBitwiseOrAcquire) \
+ V(VarHandleGetAndBitwiseOrRelease) \
+ V(VarHandleGetAndBitwiseXor) \
+ V(VarHandleGetAndBitwiseXorAcquire) \
+ V(VarHandleGetAndBitwiseXorRelease) \
+ V(VarHandleGetAndSet) \
+ V(VarHandleGetAndSetAcquire) \
+ V(VarHandleGetAndSetRelease) \
+ V(VarHandleGetOpaque) \
+ V(VarHandleGetVolatile) \
+ V(VarHandleSet) \
+ V(VarHandleSetOpaque) \
+ V(VarHandleSetRelease) \
+ V(VarHandleSetVolatile) \
+ V(VarHandleWeakCompareAndSet) \
+ V(VarHandleWeakCompareAndSetAcquire) \
+ V(VarHandleWeakCompareAndSetPlain) \
+ V(VarHandleWeakCompareAndSetRelease)
+
+// Method register on invoke.
+static const XRegister kArtMethodRegister = A0;
+
+class CodeGeneratorRISCV64;
+
+class InvokeRuntimeCallingConvention : public CallingConvention<XRegister, FRegister> {
+ public:
+ InvokeRuntimeCallingConvention()
+ : CallingConvention(kRuntimeParameterCoreRegisters,
+ kRuntimeParameterCoreRegistersLength,
+ kRuntimeParameterFpuRegisters,
+ kRuntimeParameterFpuRegistersLength,
+ kRiscv64PointerSize) {}
+
+ Location GetReturnLocation(DataType::Type return_type);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
+};
+
+class InvokeDexCallingConvention : public CallingConvention<XRegister, FRegister> {
+ public:
+ InvokeDexCallingConvention()
+ : CallingConvention(kParameterCoreRegisters,
+ kParameterCoreRegistersLength,
+ kParameterFpuRegisters,
+ kParameterFpuRegistersLength,
+ kRiscv64PointerSize) {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
+};
+
+class InvokeDexCallingConventionVisitorRISCV64 : public InvokeDexCallingConventionVisitor {
+ public:
+ InvokeDexCallingConventionVisitorRISCV64() {}
+ virtual ~InvokeDexCallingConventionVisitorRISCV64() {}
+
+ Location GetNextLocation(DataType::Type type) override;
+ Location GetReturnLocation(DataType::Type type) const override;
+ Location GetMethodLocation() const override;
+
+ private:
+ InvokeDexCallingConvention calling_convention;
+
+ DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorRISCV64);
+};
+
+class CriticalNativeCallingConventionVisitorRiscv64 : public InvokeDexCallingConventionVisitor {
+ public:
+ explicit CriticalNativeCallingConventionVisitorRiscv64(bool for_register_allocation)
+ : for_register_allocation_(for_register_allocation) {}
+
+ virtual ~CriticalNativeCallingConventionVisitorRiscv64() {}
+
+ Location GetNextLocation(DataType::Type type) override;
+ Location GetReturnLocation(DataType::Type type) const override;
+ Location GetMethodLocation() const override;
+
+ size_t GetStackOffset() const { return stack_offset_; }
+
+ private:
+ // Register allocator does not support adjusting frame size, so we cannot provide final locations
+ // of stack arguments for register allocation. We ask the register allocator for any location and
+ // move these arguments to the right place after adjusting the SP when generating the call.
+ const bool for_register_allocation_;
+ size_t gpr_index_ = 0u;
+ size_t fpr_index_ = 0u;
+ size_t stack_offset_ = 0u;
+
+ DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorRiscv64);
+};
+
+class SlowPathCodeRISCV64 : public SlowPathCode {
+ public:
+ explicit SlowPathCodeRISCV64(HInstruction* instruction)
+ : SlowPathCode(instruction), entry_label_(), exit_label_() {}
+
+ Riscv64Label* GetEntryLabel() { return &entry_label_; }
+ Riscv64Label* GetExitLabel() { return &exit_label_; }
+
+ private:
+ Riscv64Label entry_label_;
+ Riscv64Label exit_label_;
+
+ DISALLOW_COPY_AND_ASSIGN(SlowPathCodeRISCV64);
+};
+
+class ParallelMoveResolverRISCV64 : public ParallelMoveResolverWithSwap {
+ public:
+ ParallelMoveResolverRISCV64(ArenaAllocator* allocator, CodeGeneratorRISCV64* codegen)
+ : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
+
+ void EmitMove(size_t index) override;
+ void EmitSwap(size_t index) override;
+ void SpillScratch(int reg) override;
+ void RestoreScratch(int reg) override;
+
+ void Exchange(int index1, int index2, bool double_slot);
+
+ Riscv64Assembler* GetAssembler() const;
+
+ private:
+ CodeGeneratorRISCV64* const codegen_;
+
+ DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverRISCV64);
+};
+
+class FieldAccessCallingConventionRISCV64 : public FieldAccessCallingConvention {
+ public:
+ FieldAccessCallingConventionRISCV64() {}
+
+ Location GetObjectLocation() const override {
+ return Location::RegisterLocation(A1);
+ }
+ Location GetFieldIndexLocation() const override {
+ return Location::RegisterLocation(A0);
+ }
+ Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
+ return Location::RegisterLocation(A0);
+ }
+ Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED,
+ bool is_instance) const override {
+ return is_instance
+ ? Location::RegisterLocation(A2)
+ : Location::RegisterLocation(A1);
+ }
+ Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
+ return Location::FpuRegisterLocation(FA0);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionRISCV64);
+};
+
+class LocationsBuilderRISCV64 : public HGraphVisitor {
+ public:
+ LocationsBuilderRISCV64(HGraph* graph, CodeGeneratorRISCV64* codegen)
+ : HGraphVisitor(graph), codegen_(codegen) {}
+
+#define DECLARE_VISIT_INSTRUCTION(name, super) void Visit##name(H##name* instr) override;
+
+ FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_CONCRETE_INSTRUCTION_RISCV64(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+ void VisitInstruction(HInstruction* instruction) override {
+ LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id "
+ << instruction->GetId() << ")";
+ }
+
+ protected:
+ void HandleInvoke(HInvoke* invoke);
+ void HandleBinaryOp(HBinaryOperation* operation);
+ void HandleCondition(HCondition* instruction);
+ void HandleShift(HBinaryOperation* operation);
+ void HandleFieldSet(HInstruction* instruction);
+ void HandleFieldGet(HInstruction* instruction);
+
+ InvokeDexCallingConventionVisitorRISCV64 parameter_visitor_;
+
+ CodeGeneratorRISCV64* const codegen_;
+
+ DISALLOW_COPY_AND_ASSIGN(LocationsBuilderRISCV64);
+};
+
+class InstructionCodeGeneratorRISCV64 : public InstructionCodeGenerator {
+ public:
+ InstructionCodeGeneratorRISCV64(HGraph* graph, CodeGeneratorRISCV64* codegen);
+
+#define DECLARE_VISIT_INSTRUCTION(name, super) void Visit##name(H##name* instr) override;
+
+ FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_CONCRETE_INSTRUCTION_RISCV64(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+ void VisitInstruction(HInstruction* instruction) override {
+ LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id "
+ << instruction->GetId() << ")";
+ }
+
+ Riscv64Assembler* GetAssembler() const { return assembler_; }
+
+ void GenerateMemoryBarrier(MemBarrierKind kind);
+
+ void ShNAdd(XRegister rd, XRegister rs1, XRegister rs2, DataType::Type type);
+
+ protected:
+ void GenerateClassInitializationCheck(SlowPathCodeRISCV64* slow_path, XRegister class_reg);
+ void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, XRegister temp);
+ void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
+ void HandleBinaryOp(HBinaryOperation* operation);
+ void HandleCondition(HCondition* instruction);
+ void HandleShift(HBinaryOperation* operation);
+ void HandleFieldSet(HInstruction* instruction,
+ const FieldInfo& field_info,
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind);
+ void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+ // Generate a heap reference load using one register `out`:
+ //
+ // out <- *(out + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ //
+ // Location `maybe_temp` is used when generating a read barrier and
+ // shall be a register in that case; it may be an invalid location
+ // otherwise.
+ void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option);
+ // Generate a heap reference load using two different registers
+ // `out` and `obj`:
+ //
+ // out <- *(obj + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ //
+ // Location `maybe_temp` is used when generating a Baker's (fast
+ // path) read barrier and shall be a register in that case; it may
+ // be an invalid location otherwise.
+ void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option);
+
+ // Generate a GC root reference load:
+ //
+ // root <- *(obj + offset)
+ //
+ // while honoring read barriers (if any).
+ void GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ XRegister obj,
+ uint32_t offset,
+ ReadBarrierOption read_barrier_option,
+ Riscv64Label* label_low = nullptr);
+
+ void GenerateTestAndBranch(HInstruction* instruction,
+ size_t condition_input_index,
+ Riscv64Label* true_target,
+ Riscv64Label* false_target);
+ void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+ void DivRemByPowerOfTwo(HBinaryOperation* instruction);
+ void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
+ void GenerateDivRemIntegral(HBinaryOperation* instruction);
+ void GenerateIntLongCondition(IfCondition cond, LocationSummary* locations);
+ void GenerateIntLongCompareAndBranch(IfCondition cond,
+ LocationSummary* locations,
+ Riscv64Label* label);
+ void GenerateFpCondition(IfCondition cond,
+ bool gt_bias,
+ DataType::Type type,
+ LocationSummary* locations,
+ Riscv64Label* label = nullptr);
+ void HandleGoto(HInstruction* got, HBasicBlock* successor);
+ void GenPackedSwitchWithCompares(XRegister adjusted,
+ XRegister temp,
+ uint32_t num_entries,
+ HBasicBlock* switch_block);
+ void GenTableBasedPackedSwitch(XRegister adjusted,
+ XRegister temp,
+ uint32_t num_entries,
+ HBasicBlock* switch_block);
+ int32_t VecAddress(LocationSummary* locations,
+ size_t size,
+ /*out*/ XRegister* adjusted_base);
+ void GenConditionalMove(HSelect* select);
+
+ template <typename Reg,
+ void (Riscv64Assembler::*opS)(Reg, FRegister, FRegister),
+ void (Riscv64Assembler::*opD)(Reg, FRegister, FRegister)>
+ void FpBinOp(Reg rd, FRegister rs1, FRegister rs2, DataType::Type type);
+ void FAdd(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+ void FSub(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+ void FDiv(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+ void FMul(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+ void FMin(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+ void FMax(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+ void FEq(XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+ void FLt(XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+ void FLe(XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+
+ template <typename Reg,
+ void (Riscv64Assembler::*opS)(Reg, FRegister),
+ void (Riscv64Assembler::*opD)(Reg, FRegister)>
+ void FpUnOp(Reg rd, FRegister rs1, DataType::Type type);
+ void FAbs(FRegister rd, FRegister rs1, DataType::Type type);
+ void FNeg(FRegister rd, FRegister rs1, DataType::Type type);
+ void FMv(FRegister rd, FRegister rs1, DataType::Type type);
+ void FClass(XRegister rd, FRegister rs1, DataType::Type type);
+
+ void Load(Location out, XRegister rs1, int32_t offset, DataType::Type type);
+ void Store(Location value, XRegister rs1, int32_t offset, DataType::Type type);
+
+ Riscv64Assembler* const assembler_;
+ CodeGeneratorRISCV64* const codegen_;
+
+ DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorRISCV64);
+};
+
+class CodeGeneratorRISCV64 : public CodeGenerator {
+ public:
+ CodeGeneratorRISCV64(HGraph* graph,
+ const CompilerOptions& compiler_options,
+ OptimizingCompilerStats* stats = nullptr);
+ virtual ~CodeGeneratorRISCV64() {}
+
+ void GenerateFrameEntry() override;
+ void GenerateFrameExit() override;
+
+ void Bind(HBasicBlock* block) override;
+
+ size_t GetWordSize() const override {
+ // The "word" for the compiler is the core register size (64-bit for riscv64) while the
+ // riscv64 assembler uses "word" for 32-bit values and "double word" for 64-bit values.
+ return kRiscv64DoublewordSize;
+ }
+
+ bool SupportsPredicatedSIMD() const override {
+ // TODO(riscv64): Check the vector extension.
+ return false;
+ }
+
+ // Get FP register width in bytes for spilling/restoring in the slow paths.
+ //
+ // Note: In SIMD graphs this should return SIMD register width as all FP and SIMD registers
+ // alias and live SIMD registers are forced to be spilled in full size in the slow paths.
+ size_t GetSlowPathFPWidth() const override {
+ // Default implementation.
+ return GetCalleePreservedFPWidth();
+ }
+
+ size_t GetCalleePreservedFPWidth() const override {
+ return kRiscv64FloatRegSizeInBytes;
+ };
+
+ size_t GetSIMDRegisterWidth() const override {
+ // TODO(riscv64): Implement SIMD with the Vector extension.
+ // Note: HLoopOptimization calls this function even for an ISA without SIMD support.
+ return kRiscv64FloatRegSizeInBytes;
+ };
+
+ uintptr_t GetAddressOf(HBasicBlock* block) override {
+ return assembler_.GetLabelLocation(GetLabelOf(block));
+ };
+
+ Riscv64Label* GetLabelOf(HBasicBlock* block) const {
+ return CommonGetLabelOf<Riscv64Label>(block_labels_, block);
+ }
+
+ void Initialize() override { block_labels_ = CommonInitializeLabels<Riscv64Label>(); }
+
+ void MoveConstant(Location destination, int32_t value) override;
+ void MoveLocation(Location destination, Location source, DataType::Type dst_type) override;
+ void AddLocationAsTemp(Location location, LocationSummary* locations) override;
+
+ Riscv64Assembler* GetAssembler() override { return &assembler_; }
+ const Riscv64Assembler& GetAssembler() const override { return assembler_; }
+
+ HGraphVisitor* GetLocationBuilder() override { return &location_builder_; }
+ HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; }
+
+ void MaybeGenerateInlineCacheCheck(HInstruction* instruction, XRegister klass);
+
+ void SetupBlockedRegisters() const override;
+
+ size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
+
+ void DumpCoreRegister(std::ostream& stream, int reg) const override;
+ void DumpFloatingPointRegister(std::ostream& stream, int reg) const override;
+
+ InstructionSet GetInstructionSet() const override { return InstructionSet::kRiscv64; }
+
+ uint32_t GetPreferredSlotsAlignment() const override {
+ return static_cast<uint32_t>(kRiscv64PointerSize);
+ }
+
+ void Finalize() override;
+
+ // Generate code to invoke a runtime entry point.
+ void InvokeRuntime(QuickEntrypointEnum entrypoint,
+ HInstruction* instruction,
+ uint32_t dex_pc,
+ SlowPathCode* slow_path = nullptr) override;
+
+ // Generate code to invoke a runtime entry point, but do not record
+ // PC-related information in a stack map.
+ void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path);
+
+ ParallelMoveResolver* GetMoveResolver() override { return &move_resolver_; }
+
+ bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; }
+
+ void IncreaseFrame(size_t adjustment) override;
+ void DecreaseFrame(size_t adjustment) override;
+
+ void GenerateNop() override;
+
+ void GenerateImplicitNullCheck(HNullCheck* instruction) override;
+ void GenerateExplicitNullCheck(HNullCheck* instruction) override;
+
+ // Check if the desired_string_load_kind is supported. If it is, return it,
+ // otherwise return a fall-back kind that should be used instead.
+ HLoadString::LoadKind GetSupportedLoadStringKind(
+ HLoadString::LoadKind desired_string_load_kind) override;
+
+ // Check if the desired_class_load_kind is supported. If it is, return it,
+ // otherwise return a fall-back kind that should be used instead.
+ HLoadClass::LoadKind GetSupportedLoadClassKind(
+ HLoadClass::LoadKind desired_class_load_kind) override;
+
+ // Check if the desired_dispatch_info is supported. If it is, return it,
+ // otherwise return a fall-back info that should be used instead.
+ HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
+ const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, ArtMethod* method) override;
+
+ // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types,
+ // whether through .data.bimg.rel.ro, .bss, or directly in the boot image.
+ //
+ // The 20-bit and 12-bit parts of the 32-bit PC-relative offset are patched separately,
+ // necessitating two patches/infos. There can be more than two patches/infos if the
+ // instruction supplying the high part is shared with e.g. a slow path, while the low
+ // part is supplied by separate instructions, e.g.:
+ // auipc r1, high // patch
+ // lwu r2, low(r1) // patch
+ // beqz r2, slow_path
+ // back:
+ // ...
+ // slow_path:
+ // ...
+ // sw r2, low(r1) // patch
+ // j back
+ struct PcRelativePatchInfo : PatchInfo<Riscv64Label> {
+ PcRelativePatchInfo(const DexFile* dex_file,
+ uint32_t off_or_idx,
+ const PcRelativePatchInfo* info_high)
+ : PatchInfo<Riscv64Label>(dex_file, off_or_idx),
+ pc_insn_label(info_high != nullptr ? &info_high->label : &label) {
+ DCHECK_IMPLIES(info_high != nullptr, info_high->pc_insn_label == &info_high->label);
+ }
+
+ // Pointer to the info for the high part patch or nullptr if this is the high part patch info.
+ const Riscv64Label* pc_insn_label;
+
+ private:
+ PcRelativePatchInfo(PcRelativePatchInfo&& other) = delete;
+ DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo);
+ };
+
+ PcRelativePatchInfo* NewBootImageIntrinsicPatch(uint32_t intrinsic_data,
+ const PcRelativePatchInfo* info_high = nullptr);
+ PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset,
+ const PcRelativePatchInfo* info_high = nullptr);
+ PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method,
+ const PcRelativePatchInfo* info_high = nullptr);
+ PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method,
+ const PcRelativePatchInfo* info_high = nullptr);
+ PcRelativePatchInfo* NewBootImageJniEntrypointPatch(
+ MethodReference target_method, const PcRelativePatchInfo* info_high = nullptr);
+
+ PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file,
+ dex::TypeIndex type_index,
+ const PcRelativePatchInfo* info_high = nullptr);
+ PcRelativePatchInfo* NewTypeBssEntryPatch(HLoadClass* load_class,
+ const PcRelativePatchInfo* info_high = nullptr);
+ PcRelativePatchInfo* NewBootImageStringPatch(const DexFile& dex_file,
+ dex::StringIndex string_index,
+ const PcRelativePatchInfo* info_high = nullptr);
+ PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file,
+ dex::StringIndex string_index,
+ const PcRelativePatchInfo* info_high = nullptr);
+
+ void EmitPcRelativeAuipcPlaceholder(PcRelativePatchInfo* info_high, XRegister out);
+ void EmitPcRelativeAddiPlaceholder(PcRelativePatchInfo* info_low, XRegister rd, XRegister rs1);
+ void EmitPcRelativeLwuPlaceholder(PcRelativePatchInfo* info_low, XRegister rd, XRegister rs1);
+ void EmitPcRelativeLdPlaceholder(PcRelativePatchInfo* info_low, XRegister rd, XRegister rs1);
+
+ void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override;
+
+ Literal* DeduplicateBootImageAddressLiteral(uint64_t address);
+ void PatchJitRootUse(uint8_t* code,
+ const uint8_t* roots_data,
+ const Literal* literal,
+ uint64_t index_in_table) const;
+ Literal* DeduplicateJitStringLiteral(const DexFile& dex_file,
+ dex::StringIndex string_index,
+ Handle<mirror::String> handle);
+ Literal* DeduplicateJitClassLiteral(const DexFile& dex_file,
+ dex::TypeIndex type_index,
+ Handle<mirror::Class> handle);
+ void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override;
+
+ void LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke);
+ void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+ Location temp,
+ SlowPathCode* slow_path = nullptr) override;
+ void GenerateVirtualCall(HInvokeVirtual* invoke,
+ Location temp,
+ SlowPathCode* slow_path = nullptr) override;
+ void MoveFromReturnRegister(Location trg, DataType::Type type) override;
+
+ void GenerateMemoryBarrier(MemBarrierKind kind);
+
+ void MaybeIncrementHotness(bool is_frame_entry);
+
+ bool CanUseImplicitSuspendCheck() const;
+
+
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference field load when Baker's read barriers are used.
+ void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ XRegister obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check);
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference array load when Baker's read barriers are used.
+ void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ XRegister obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check);
+ // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier
+ // and GenerateArrayLoadWithBakerReadBarrier.
+ void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ XRegister obj,
+ uint32_t offset,
+ Location index,
+ Location temp,
+ bool needs_null_check);
+
+ // Generate a read barrier for a heap reference within `instruction`
+ // using a slow path.
+ //
+ // A read barrier for an object reference read from the heap is
+ // implemented as a call to the artReadBarrierSlow runtime entry
+ // point, which is passed the values in locations `ref`, `obj`, and
+ // `offset`:
+ //
+ // mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+ // mirror::Object* obj,
+ // uint32_t offset);
+ //
+ // The `out` location contains the value returned by
+ // artReadBarrierSlow.
+ //
+ // When `index` is provided (i.e. for array accesses), the offset
+ // value passed to artReadBarrierSlow is adjusted to take `index`
+ // into account.
+ void GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // If read barriers are enabled, generate a read barrier for a heap
+ // reference using a slow path. If heap poisoning is enabled, also
+ // unpoison the reference in `out`.
+ void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // Generate a read barrier for a GC root within `instruction` using
+ // a slow path.
+ //
+ // A read barrier for an object reference GC root is implemented as
+ // a call to the artReadBarrierForRootSlow runtime entry point,
+ // which is passed the value in location `root`:
+ //
+ // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+ //
+ // The `out` location contains the value returned by
+ // artReadBarrierForRootSlow.
+ void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
+
+ void MarkGCCard(XRegister object, XRegister value, bool value_can_be_null);
+
+ //
+ // Heap poisoning.
+ //
+
+ // Poison a heap reference contained in `reg`.
+ void PoisonHeapReference(XRegister reg);
+
+ // Unpoison a heap reference contained in `reg`.
+ void UnpoisonHeapReference(XRegister reg);
+
+ // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+ void MaybePoisonHeapReference(XRegister reg);
+
+ // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
+ void MaybeUnpoisonHeapReference(XRegister reg);
+
+ void SwapLocations(Location loc1, Location loc2, DataType::Type type);
+
+ private:
+ using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>;
+ using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, Literal*>;
+ using StringToLiteralMap =
+ ArenaSafeMap<StringReference, Literal*, StringReferenceValueComparator>;
+ using TypeToLiteralMap = ArenaSafeMap<TypeReference, Literal*, TypeReferenceValueComparator>;
+
+ Literal* DeduplicateUint32Literal(uint32_t value);
+ Literal* DeduplicateUint64Literal(uint64_t value);
+
+ PcRelativePatchInfo* NewPcRelativePatch(const DexFile* dex_file,
+ uint32_t offset_or_index,
+ const PcRelativePatchInfo* info_high,
+ ArenaDeque<PcRelativePatchInfo>* patches);
+
+ template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+ void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos,
+ ArenaVector<linker::LinkerPatch>* linker_patches);
+
+ Riscv64Assembler assembler_;
+ LocationsBuilderRISCV64 location_builder_;
+ InstructionCodeGeneratorRISCV64 instruction_visitor_;
+ Riscv64Label frame_entry_label_;
+
+ // Labels for each block that will be compiled.
+ Riscv64Label* block_labels_; // Indexed by block id.
+
+ ParallelMoveResolverRISCV64 move_resolver_;
+
+ // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
+ Uint32ToLiteralMap uint32_literals_;
+ // Deduplication map for 64-bit literals, used for non-patchable method address or method code
+ // address.
+ Uint64ToLiteralMap uint64_literals_;
+
+ // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
+ // PC-relative method patch info for kBssEntry.
+ ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
+ // PC-relative type patch info for kBootImageLinkTimePcRelative.
+ ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
+ // PC-relative type patch info for kBssEntry.
+ ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+ // PC-relative public type patch info for kBssEntryPublic.
+ ArenaDeque<PcRelativePatchInfo> public_type_bss_entry_patches_;
+ // PC-relative package type patch info for kBssEntryPackage.
+ ArenaDeque<PcRelativePatchInfo> package_type_bss_entry_patches_;
+ // PC-relative String patch info for kBootImageLinkTimePcRelative.
+ ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
+ // PC-relative String patch info for kBssEntry.
+ ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
+ // PC-relative method patch info for kBootImageLinkTimePcRelative+kCallCriticalNative.
+ ArenaDeque<PcRelativePatchInfo> boot_image_jni_entrypoint_patches_;
+ // PC-relative patch info for IntrinsicObjects for the boot image,
+ // and for method/type/string patches for kBootImageRelRo otherwise.
+ ArenaDeque<PcRelativePatchInfo> boot_image_other_patches_;
+
+ // Patches for string root accesses in JIT compiled code.
+ StringToLiteralMap jit_string_patches_;
+ // Patches for class root accesses in JIT compiled code.
+ TypeToLiteralMap jit_class_patches_;
+};
+
+} // namespace riscv64
+} // namespace art
#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_
diff --git a/compiler/optimizing/code_generator_vector_arm64_neon.cc b/compiler/optimizing/code_generator_vector_arm64_neon.cc
index 6b6e25cf0c..848b5e7567 100644
--- a/compiler/optimizing/code_generator_vector_arm64_neon.cc
+++ b/compiler/optimizing/code_generator_vector_arm64_neon.cc
@@ -61,10 +61,8 @@ inline bool NEONCanEncodeConstantAsImmediate(HConstant* constant, HInstruction*
// - constant location - if 'constant' is an actual constant and its value can be
// encoded into the instruction.
// - register location otherwise.
-inline Location NEONEncodableConstantOrRegister(HInstruction* constant,
- HInstruction* instr) {
- if (constant->IsConstant()
- && NEONCanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
+inline Location NEONEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) {
+ if (constant->IsConstant() && NEONCanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
return Location::ConstantLocation(constant);
}
@@ -1533,12 +1531,32 @@ void InstructionCodeGeneratorARM64Neon::VisitVecPredWhile(HVecPredWhile* instruc
UNREACHABLE();
}
-void LocationsBuilderARM64Neon::VisitVecPredCondition(HVecPredCondition* instruction) {
+void LocationsBuilderARM64Neon::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
-void InstructionCodeGeneratorARM64Neon::VisitVecPredCondition(HVecPredCondition* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARM64Neon::VisitVecCondition(HVecCondition* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARM64Neon::VisitVecCondition(HVecCondition* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARM64Neon::VisitVecPredNot(HVecPredNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARM64Neon::VisitVecPredNot(HVecPredNot* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
diff --git a/compiler/optimizing/code_generator_vector_arm64_sve.cc b/compiler/optimizing/code_generator_vector_arm64_sve.cc
index fe15791d3f..ef79932899 100644
--- a/compiler/optimizing/code_generator_vector_arm64_sve.cc
+++ b/compiler/optimizing/code_generator_vector_arm64_sve.cc
@@ -62,8 +62,7 @@ static bool SVECanEncodeConstantAsImmediate(HConstant* constant, HInstruction* i
// encoded into the instruction.
// - register location otherwise.
inline Location SVEEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) {
- if (constant->IsConstant()
- && SVECanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
+ if (constant->IsConstant() && SVECanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
return Location::ConstantLocation(constant);
}
@@ -246,7 +245,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecReduce(HVecReduce* instruction) {
LocationSummary* locations = instruction->GetLocations();
const ZRegister src = ZRegisterFrom(locations->InAt(0));
const VRegister dst = DRegisterFrom(locations->Out());
- const PRegister p_reg = LoopPReg();
+ const PRegister p_reg = GetVecGoverningPReg(instruction);
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kInt32:
@@ -284,7 +283,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecCnv(HVecCnv* instruction) {
LocationSummary* locations = instruction->GetLocations();
const ZRegister src = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
DataType::Type from = instruction->GetInputType();
DataType::Type to = instruction->GetResultType();
ValidateVectorLength(instruction);
@@ -304,7 +303,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecNeg(HVecNeg* instruction) {
LocationSummary* locations = instruction->GetLocations();
const ZRegister src = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
@@ -342,7 +341,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecAbs(HVecAbs* instruction) {
LocationSummary* locations = instruction->GetLocations();
const ZRegister src = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kInt8:
@@ -378,7 +377,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecNot(HVecNot* instruction) {
LocationSummary* locations = instruction->GetLocations();
const ZRegister src = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool: // special case boolean-not
@@ -438,7 +437,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecAdd(HVecAdd* instruction) {
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
@@ -497,7 +496,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecSub(HVecSub* instruction) {
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
@@ -546,7 +545,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecMul(HVecMul* instruction) {
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
@@ -585,7 +584,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecDiv(HVecDiv* instruction) {
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
ValidateVectorLength(instruction);
// Note: VIXL guarantees StrictNaNPropagation for Fdiv.
@@ -633,7 +632,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecAnd(HVecAnd* instruction) {
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
@@ -678,7 +677,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecOr(HVecOr* instruction) {
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
@@ -714,7 +713,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecXor(HVecXor* instruction) {
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
@@ -769,7 +768,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecShl(HVecShl* instruction) {
LocationSummary* locations = instruction->GetLocations();
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
@@ -802,7 +801,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecShr(HVecShr* instruction) {
LocationSummary* locations = instruction->GetLocations();
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
@@ -835,7 +834,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecUShr(HVecUShr* instruction) {
LocationSummary* locations = instruction->GetLocations();
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
@@ -966,7 +965,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecMultiplyAccumulate(
const ZRegister acc = ZRegisterFrom(locations->InAt(0));
const ZRegister left = ZRegisterFrom(locations->InAt(1));
const ZRegister right = ZRegisterFrom(locations->InAt(2));
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
DCHECK(locations->InAt(0).Equals(locations->Out()));
ValidateVectorLength(instruction);
@@ -1029,7 +1028,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecDotProd(HVecDotProd* instruction)
const ZRegister acc = ZRegisterFrom(locations->InAt(0));
const ZRegister left = ZRegisterFrom(locations->InAt(1));
const ZRegister right = ZRegisterFrom(locations->InAt(2));
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
@@ -1099,7 +1098,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecLoad(HVecLoad* instruction) {
const ZRegister reg = ZRegisterFrom(locations->Out());
UseScratchRegisterScope temps(GetVIXLAssembler());
Register scratch;
- const PRegisterZ p_reg = LoopPReg().Zeroing();
+ const PRegisterZ p_reg = GetVecGoverningPReg(instruction).Zeroing();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
@@ -1141,7 +1140,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecStore(HVecStore* instruction) {
const ZRegister reg = ZRegisterFrom(locations->InAt(2));
UseScratchRegisterScope temps(GetVIXLAssembler());
Register scratch;
- const PRegisterZ p_reg = LoopPReg().Zeroing();
+ const PRegisterZ p_reg = GetVecGoverningPReg(instruction).Zeroing();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
@@ -1182,25 +1181,25 @@ void LocationsBuilderARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) {
void InstructionCodeGeneratorARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) {
// Instruction is not predicated, see nodes_vector.h
DCHECK(!instruction->IsPredicated());
- const PRegister p_reg = LoopPReg();
+ const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- __ Ptrue(p_reg.VnB(), vixl::aarch64::SVE_ALL);
+ __ Ptrue(output_p_reg.VnB(), vixl::aarch64::SVE_ALL);
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- __ Ptrue(p_reg.VnH(), vixl::aarch64::SVE_ALL);
+ __ Ptrue(output_p_reg.VnH(), vixl::aarch64::SVE_ALL);
break;
case DataType::Type::kInt32:
case DataType::Type::kFloat32:
- __ Ptrue(p_reg.VnS(), vixl::aarch64::SVE_ALL);
+ __ Ptrue(output_p_reg.VnS(), vixl::aarch64::SVE_ALL);
break;
case DataType::Type::kInt64:
case DataType::Type::kFloat64:
- __ Ptrue(p_reg.VnD(), vixl::aarch64::SVE_ALL);
+ __ Ptrue(output_p_reg.VnD(), vixl::aarch64::SVE_ALL);
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
@@ -1208,6 +1207,67 @@ void InstructionCodeGeneratorARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instru
}
}
+void LocationsBuilderARM64Sve::VisitVecCondition(HVecCondition* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecCondition(HVecCondition* instruction) {
+ DCHECK(instruction->IsPredicated());
+ LocationSummary* locations = instruction->GetLocations();
+ const ZRegister left = ZRegisterFrom(locations->InAt(0));
+ const ZRegister right = ZRegisterFrom(locations->InAt(1));
+ const PRegisterZ p_reg = GetVecGoverningPReg(instruction).Zeroing();
+ const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction);
+
+ HVecOperation* a = instruction->InputAt(0)->AsVecOperation();
+ HVecOperation* b = instruction->InputAt(1)->AsVecOperation();
+ DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
+ HVecOperation::ToSignedType(b->GetPackedType()));
+ ValidateVectorLength(instruction);
+
+ // TODO: Support other condition OPs and types.
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ __ Cmpeq(output_p_reg.VnB(), p_reg, left.VnB(), right.VnB());
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ __ Cmpeq(output_p_reg.VnH(), p_reg, left.VnH(), right.VnH());
+ break;
+ case DataType::Type::kInt32:
+ __ Cmpeq(output_p_reg.VnS(), p_reg, left.VnS(), right.VnS());
+ break;
+ case DataType::Type::kInt64:
+ __ Cmpeq(output_p_reg.VnD(), p_reg, left.VnD(), right.VnD());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecPredNot(HVecPredNot* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ DCHECK(instruction->InputAt(0)->IsVecPredSetOperation());
+ locations->SetInAt(0, Location::NoLocation());
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecPredNot(HVecPredNot* instruction) {
+ DCHECK(instruction->IsPredicated());
+
+ const PRegister input_p_reg = GetVecPredSetFixedOutPReg(
+ instruction->InputAt(0)->AsVecPredSetOperation());
+ const PRegister control_p_reg = GetVecGoverningPReg(instruction);
+ const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction);
+
+ __ Not(output_p_reg.VnB(), control_p_reg.Zeroing(), input_p_reg.VnB());
+}
+
void LocationsBuilderARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
locations->SetInAt(0, Location::RequiresRegister());
@@ -1218,8 +1278,8 @@ void LocationsBuilderARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) {
// Semantically, the out location of this instruction and predicate inputs locations of
// its users should be a fixed predicate register (similar to
// Location::RegisterLocation(int reg)). But the register allocator (RA) doesn't support
- // SIMD regs (e.g. predicate), so LoopPReg() is used explicitly without exposing it
- // to the RA.
+ // SIMD regs (e.g. predicate), so fixed registers are used explicitly without exposing it
+ // to the RA (through GetVecPredSetFixedOutPReg()).
//
// To make the RA happy Location::NoLocation() was used for all the vector instructions
// predicate inputs; but for the PredSetOperations (e.g. VecPredWhile) Location::NoLocation()
@@ -1241,21 +1301,22 @@ void InstructionCodeGeneratorARM64Sve::VisitVecPredWhile(HVecPredWhile* instruct
DCHECK(instruction->GetCondKind() == HVecPredWhile::CondKind::kLO);
Register left = InputRegisterAt(instruction, 0);
Register right = InputRegisterAt(instruction, 1);
+ const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction);
DCHECK_EQ(codegen_->GetSIMDRegisterWidth() % instruction->GetVectorLength(), 0u);
switch (codegen_->GetSIMDRegisterWidth() / instruction->GetVectorLength()) {
case 1u:
- __ Whilelo(LoopPReg().VnB(), left, right);
+ __ Whilelo(output_p_reg.VnB(), left, right);
break;
case 2u:
- __ Whilelo(LoopPReg().VnH(), left, right);
+ __ Whilelo(output_p_reg.VnH(), left, right);
break;
case 4u:
- __ Whilelo(LoopPReg().VnS(), left, right);
+ __ Whilelo(output_p_reg.VnS(), left, right);
break;
case 8u:
- __ Whilelo(LoopPReg().VnD(), left, right);
+ __ Whilelo(output_p_reg.VnD(), left, right);
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
@@ -1263,20 +1324,20 @@ void InstructionCodeGeneratorARM64Sve::VisitVecPredWhile(HVecPredWhile* instruct
}
}
-void LocationsBuilderARM64Sve::VisitVecPredCondition(HVecPredCondition* instruction) {
+void LocationsBuilderARM64Sve::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
locations->SetInAt(0, Location::NoLocation());
// Result of the operation - a boolean value in a core register.
locations->SetOut(Location::RequiresRegister());
}
-void InstructionCodeGeneratorARM64Sve::VisitVecPredCondition(HVecPredCondition* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
// Instruction is not predicated, see nodes_vector.h
DCHECK(!instruction->IsPredicated());
Register reg = OutputRegister(instruction);
- // Currently VecPredCondition is only used as part of vectorized loop check condition
+ // Currently VecPredToBoolean is only used as part of vectorized loop check condition
// evaluation.
- DCHECK(instruction->GetPCondKind() == HVecPredCondition::PCondKind::kNFirst);
+ DCHECK(instruction->GetPCondKind() == HVecPredToBoolean::PCondKind::kNFirst);
__ Cset(reg, pl);
}
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index e8ecf28386..70f22af17b 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -1069,12 +1069,32 @@ void InstructionCodeGeneratorARMVIXL::VisitVecPredWhile(HVecPredWhile* instructi
UNREACHABLE();
}
-void LocationsBuilderARMVIXL::VisitVecPredCondition(HVecPredCondition* instruction) {
+void LocationsBuilderARMVIXL::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
-void InstructionCodeGeneratorARMVIXL::VisitVecPredCondition(HVecPredCondition* instruction) {
+void InstructionCodeGeneratorARMVIXL::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARMVIXL::VisitVecCondition(HVecCondition* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecCondition(HVecCondition* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARMVIXL::VisitVecPredNot(HVecPredNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecPredNot(HVecPredNot* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 343a6e1af4..1f9b2578ac 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -1401,12 +1401,32 @@ void InstructionCodeGeneratorX86::VisitVecPredWhile(HVecPredWhile* instruction)
UNREACHABLE();
}
-void LocationsBuilderX86::VisitVecPredCondition(HVecPredCondition* instruction) {
+void LocationsBuilderX86::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
-void InstructionCodeGeneratorX86::VisitVecPredCondition(HVecPredCondition* instruction) {
+void InstructionCodeGeneratorX86::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86::VisitVecCondition(HVecCondition* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86::VisitVecCondition(HVecCondition* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86::VisitVecPredNot(HVecPredNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86::VisitVecPredNot(HVecPredNot* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index fb6e4e753f..47afa3b4a1 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -1374,12 +1374,32 @@ void InstructionCodeGeneratorX86_64::VisitVecPredWhile(HVecPredWhile* instructio
UNREACHABLE();
}
-void LocationsBuilderX86_64::VisitVecPredCondition(HVecPredCondition* instruction) {
+void LocationsBuilderX86_64::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
-void InstructionCodeGeneratorX86_64::VisitVecPredCondition(HVecPredCondition* instruction) {
+void InstructionCodeGeneratorX86_64::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86_64::VisitVecCondition(HVecCondition* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecCondition(HVecCondition* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86_64::VisitVecPredNot(HVecPredNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecPredNot(HVecPredNot* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index cb1cecc45a..b8c8d9f73d 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -27,6 +27,7 @@
#include "heap_poisoning.h"
#include "interpreter/mterp/nterp.h"
#include "intrinsics.h"
+#include "intrinsics_list.h"
#include "intrinsics_utils.h"
#include "intrinsics_x86.h"
#include "jit/profiling_info.h"
@@ -38,6 +39,7 @@
#include "optimizing/nodes.h"
#include "scoped_thread_state_change-inl.h"
#include "thread.h"
+#include "trace.h"
#include "utils/assembler.h"
#include "utils/stack_checks.h"
#include "utils/x86/assembler_x86.h"
@@ -839,7 +841,8 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
(instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
(instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObject) ||
- (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectVolatile) ||
+ (instruction_->AsInvoke()->GetIntrinsic() ==
+ Intrinsics::kJdkUnsafeGetObjectVolatile) ||
(instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectAcquire))
<< instruction_->AsInvoke()->GetIntrinsic();
DCHECK_EQ(offset_, 0U);
@@ -1107,6 +1110,7 @@ void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
}
namespace detail {
+
// Mark which intrinsics we don't have handcrafted code for.
template <Intrinsics T>
struct IsUnimplemented {
@@ -1121,15 +1125,13 @@ struct IsUnimplemented {
UNIMPLEMENTED_INTRINSIC_LIST_X86(TRUE_OVERRIDE)
#undef TRUE_OVERRIDE
-#include "intrinsics_list.h"
static constexpr bool kIsIntrinsicUnimplemented[] = {
- false, // kNone
+ false, // kNone
#define IS_UNIMPLEMENTED(Intrinsic, ...) \
- IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
- INTRINSICS_LIST(IS_UNIMPLEMENTED)
+ IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+ ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
#undef IS_UNIMPLEMENTED
};
-#undef INTRINSICS_LIST
} // namespace detail
@@ -1140,8 +1142,7 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
kNumberOfCpuRegisters,
kNumberOfXmmRegisters,
kNumberOfRegisterPairs,
- ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
- arraysize(kCoreCalleeSaves))
+ ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves))
| (1 << kFakeReturnRegister),
0,
compiler_options,
@@ -1221,12 +1222,18 @@ void LocationsBuilderX86::VisitMethodExitHook(HMethodExitHook* method_hook) {
LocationSummary* locations = new (GetGraph()->GetAllocator())
LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
SetInForReturnValue(method_hook, locations);
+ // We use rdtsc to obtain a timestamp for tracing. rdtsc returns the results in EAX + EDX.
+ locations->AddTemp(Location::RegisterLocation(EAX));
+ locations->AddTemp(Location::RegisterLocation(EDX));
+ // An additional temporary register to hold address to store the timestamp counter.
+ locations->AddTemp(Location::RequiresRegister());
}
void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* instruction) {
SlowPathCode* slow_path =
new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86(instruction);
codegen_->AddSlowPath(slow_path);
+ LocationSummary* locations = instruction->GetLocations();
if (instruction->IsMethodExitHook()) {
// Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
@@ -1242,8 +1249,51 @@ void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* inst
MemberOffset offset = instruction->IsMethodExitHook() ?
instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
- __ cmpb(Address::Absolute(address + offset.Int32Value()), Immediate(0));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ cmpb(Address::Absolute(address + offset.Int32Value()),
+ Immediate(instrumentation::Instrumentation::kFastTraceListeners));
+ // Check if there are any trace method entry / exit listeners. If no, continue.
+ __ j(kLess, slow_path->GetExitLabel());
+ // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
+ // If yes, just take the slow path.
+ __ j(kGreater, slow_path->GetEntryLabel());
+
+ // For entry_addr use the first temp that isn't EAX or EDX. We need this after
+ // rdtsc which returns values in EAX + EDX.
+ Register entry_addr = locations->GetTemp(2).AsRegister<Register>();
+ Register index = locations->GetTemp(1).AsRegister<Register>();
+
+ // Check if there is place in the buffer for a new entry, if no, take slow path.
+ uint32_t trace_buffer_ptr = Thread::TraceBufferPtrOffset<kX86PointerSize>().Int32Value();
+ uint64_t trace_buffer_index_offset =
+ Thread::TraceBufferIndexOffset<kX86PointerSize>().Int32Value();
+
+ __ fs()->movl(index, Address::Absolute(trace_buffer_index_offset));
+ __ subl(index, Immediate(kNumEntriesForWallClock));
+ __ j(kLess, slow_path->GetEntryLabel());
+
+ // Update the index in the `Thread`.
+ __ fs()->movl(Address::Absolute(trace_buffer_index_offset), index);
+ // Calculate the entry address in the buffer.
+ // entry_addr = base_addr + sizeof(void*) * index
+ __ fs()->movl(entry_addr, Address::Absolute(trace_buffer_ptr));
+ __ leal(entry_addr, Address(entry_addr, index, TIMES_4, 0));
+
+ // Record method pointer and trace action.
+ Register method = index;
+ __ movl(method, Address(ESP, kCurrentMethodStackOffset));
+ // Use last two bits to encode trace method action. For MethodEntry it is 0
+ // so no need to set the bits since they are 0 already.
+ if (instruction->IsMethodExitHook()) {
+ DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
+ static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
+ static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
+ __ orl(method, Immediate(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
+ }
+ __ movl(Address(entry_addr, kMethodOffsetInBytes), method);
+ // Get the timestamp. rdtsc returns timestamp in EAX + EDX.
+ __ rdtsc();
+ __ movl(Address(entry_addr, kTimestampOffsetInBytes), EAX);
+ __ movl(Address(entry_addr, kHighTimestampOffsetInBytes), EDX);
__ Bind(slow_path->GetExitLabel());
}
@@ -1254,7 +1304,13 @@ void InstructionCodeGeneratorX86::VisitMethodExitHook(HMethodExitHook* instructi
}
void LocationsBuilderX86::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
- new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+ LocationSummary* locations = new (GetGraph()->GetAllocator())
+ LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+ // We use rdtsc to obtain a timestamp for tracing. rdtsc returns the results in EAX + EDX.
+ locations->AddTemp(Location::RegisterLocation(EAX));
+ locations->AddTemp(Location::RegisterLocation(EDX));
+ // An additional temporary register to hold address to store the timestamp counter.
+ locations->AddTemp(Location::RequiresRegister());
}
void InstructionCodeGeneratorX86::VisitMethodEntryHook(HMethodEntryHook* instruction) {
@@ -1865,8 +1921,7 @@ void LocationsBuilderX86::VisitExit(HExit* exit) {
exit->SetLocations(nullptr);
}
-void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
-}
+void InstructionCodeGeneratorX86::VisitExit([[maybe_unused]] HExit* exit) {}
template<class LabelType>
void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
@@ -1981,7 +2036,7 @@ void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
Location rhs,
HInstruction* insn,
bool is_double) {
- HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable();
+ HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTableOrNull();
if (is_double) {
if (rhs.IsFpuRegister()) {
__ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
@@ -2506,7 +2561,7 @@ void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
// Will be generated at use site.
}
@@ -2516,7 +2571,7 @@ void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
// Will be generated at use site.
}
@@ -2526,7 +2581,7 @@ void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
// Will be generated at use site.
}
@@ -2536,7 +2591,7 @@ void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
// Will be generated at use site.
}
@@ -2546,7 +2601,7 @@ void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitDoubleConstant([[maybe_unused]] HDoubleConstant* constant) {
// Will be generated at use site.
}
@@ -2555,7 +2610,7 @@ void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_f
}
void InstructionCodeGeneratorX86::VisitConstructorFence(
- HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HConstructorFence* constructor_fence) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
}
@@ -2571,7 +2626,7 @@ void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
ret->SetLocations(nullptr);
}
-void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
codegen_->GenerateFrameExit();
}
@@ -2954,10 +3009,10 @@ void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
constant_area));
__ xorps(out.AsFpuRegister<XmmRegister>(), mask);
} else {
- __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
- neg->GetBaseMethodAddress(),
- constant_area));
- __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
+ __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
+ neg->GetBaseMethodAddress(),
+ constant_area));
+ __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
}
}
@@ -5086,8 +5141,7 @@ void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
}
void InstructionCodeGeneratorX86::VisitParameterValue(
- HParameterValue* instruction ATTRIBUTE_UNUSED) {
-}
+ [[maybe_unused]] HParameterValue* instruction) {}
void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
LocationSummary* locations =
@@ -5095,7 +5149,7 @@ void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
}
-void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitCurrentMethod([[maybe_unused]] HCurrentMethod* instruction) {
}
void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
@@ -5294,7 +5348,7 @@ void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
locations->SetOut(Location::Any());
}
-void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitPhi([[maybe_unused]] HPhi* instruction) {
LOG(FATAL) << "Unreachable";
}
@@ -5323,8 +5377,8 @@ void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
}
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
- const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- ArtMethod* method ATTRIBUTE_UNUSED) {
+ const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+ [[maybe_unused]] ArtMethod* method) {
return desired_dispatch_info;
}
@@ -6749,7 +6803,7 @@ void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
}
}
-void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderX86::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
LOG(FATAL) << "Unreachable";
}
@@ -7213,9 +7267,8 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
Register out = out_loc.AsRegister<Register>();
bool generate_null_check = false;
- const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
- ? kWithoutReadBarrier
- : gCompilerReadBarrierOption;
+ const ReadBarrierOption read_barrier_option =
+ cls->IsInBootImage() ? kWithoutReadBarrier : GetCompilerReadBarrierOption();
switch (load_kind) {
case HLoadClass::LoadKind::kReferrersClass: {
DCHECK(!cls->CanCallRuntime());
@@ -7445,7 +7498,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S
Address address = Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
// /* GcRoot<mirror::String> */ out = *address /* PC-relative */
- GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
+ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, GetCompilerReadBarrierOption());
// No need for memory fence, thanks to the x86 memory model.
SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
codegen_->AddSlowPath(slow_path);
@@ -7465,14 +7518,13 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S
Label* fixup_label = codegen_->NewJitRootStringPatch(
load->GetDexFile(), load->GetStringIndex(), load->GetString());
// /* GcRoot<mirror::String> */ out = *address
- GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
+ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, GetCompilerReadBarrierOption());
return;
}
default:
break;
}
- // TODO: Re-add the compiler code to do string dex cache lookup again.
InvokeRuntimeCallingConvention calling_convention;
DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
__ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
@@ -7498,7 +7550,7 @@ void LocationsBuilderX86::VisitClearException(HClearException* clear) {
new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
}
-void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitClearException([[maybe_unused]] HClearException* clear) {
__ fs()->movl(GetExceptionTlsAddress(), Immediate(0));
}
@@ -7840,7 +7892,6 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
} else {
locations->SetInAt(1, Location::Any());
}
- // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
@@ -8028,11 +8079,11 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
kWithoutReadBarrier);
// /* HeapReference<Class> */ temp = temp->iftable_
- GenerateReferenceLoadTwoRegisters(instruction,
- temp_loc,
- temp_loc,
- iftable_offset,
- kWithoutReadBarrier);
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ iftable_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// Iftable is never null.
__ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
// Maybe poison the `cls` for direct comparison with memory.
@@ -8584,12 +8635,12 @@ void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
__ Bind(slow_path->GetExitLabel());
}
-void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderX86::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
// Nothing to do, this should be removed during prepare for register allocator.
LOG(FATAL) << "Unreachable";
}
-void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
// Nothing to do, this should be removed during prepare for register allocator.
LOG(FATAL) << "Unreachable";
}
@@ -8782,13 +8833,15 @@ void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromCons
case DataType::Type::kFloat32:
__ movss(out.AsFpuRegister<XmmRegister>(),
codegen_->LiteralFloatAddress(
- value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
+ value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
break;
case DataType::Type::kFloat64:
__ movsd(out.AsFpuRegister<XmmRegister>(),
codegen_->LiteralDoubleAddress(
- value->AsDoubleConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
+ value->AsDoubleConstant()->GetValue(),
+ insn->GetBaseMethodAddress(),
+ const_area));
break;
case DataType::Type::kInt32:
@@ -8877,7 +8930,7 @@ class JumpTableRIPFixup : public RIPFixup {
const HX86PackedSwitch* switch_instr_;
};
-void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
+void CodeGeneratorX86::Finalize() {
// Generate the constant area if needed.
X86Assembler* assembler = GetAssembler();
@@ -8897,7 +8950,7 @@ void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
}
// And finish up.
- CodeGenerator::Finalize(allocator);
+ CodeGenerator::Finalize();
}
Address CodeGeneratorX86::LiteralDoubleAddress(double v,
@@ -8968,9 +9021,9 @@ Address CodeGeneratorX86::ArrayAddress(Register obj,
Location index,
ScaleFactor scale,
uint32_t data_offset) {
- return index.IsConstant() ?
- Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
- Address(obj, index.AsRegister<Register>(), scale, data_offset);
+ return index.IsConstant()
+ ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset)
+ : Address(obj, index.AsRegister<Register>(), scale, data_offset);
}
Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
@@ -9025,7 +9078,7 @@ void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
- dchecked_integral_cast<uint32_t>(address);
+ dchecked_integral_cast<uint32_t>(address);
}
void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
@@ -9042,13 +9095,13 @@ void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_da
}
}
-void LocationsBuilderX86::VisitIntermediateAddress(HIntermediateAddress* instruction
- ATTRIBUTE_UNUSED) {
+void LocationsBuilderX86::VisitIntermediateAddress(
+ [[maybe_unused]] HIntermediateAddress* instruction) {
LOG(FATAL) << "Unreachable";
}
-void InstructionCodeGeneratorX86::VisitIntermediateAddress(HIntermediateAddress* instruction
- ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitIntermediateAddress(
+ [[maybe_unused]] HIntermediateAddress* instruction) {
LOG(FATAL) << "Unreachable";
}
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index d27155f31d..aa25528e08 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -196,7 +196,7 @@ class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention {
? Location::RegisterLocation(EDX)
: Location::RegisterLocation(ECX));
}
- Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
+ Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override {
return Location::FpuRegisterLocation(XMM0);
}
@@ -635,7 +635,7 @@ class CodeGeneratorX86 : public CodeGenerator {
Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value);
- void Finalize(CodeAllocator* allocator) override;
+ void Finalize() override;
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index eea6b204fa..f61a1f04c3 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -27,6 +27,7 @@
#include "heap_poisoning.h"
#include "interpreter/mterp/nterp.h"
#include "intrinsics.h"
+#include "intrinsics_list.h"
#include "intrinsics_utils.h"
#include "intrinsics_x86_64.h"
#include "jit/profiling_info.h"
@@ -39,6 +40,7 @@
#include "optimizing/nodes.h"
#include "scoped_thread_state_change-inl.h"
#include "thread.h"
+#include "trace.h"
#include "utils/assembler.h"
#include "utils/stack_checks.h"
#include "utils/x86_64/assembler_x86_64.h"
@@ -856,7 +858,8 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
(instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
(instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObject) ||
- (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectVolatile) ||
+ (instruction_->AsInvoke()->GetIntrinsic() ==
+ Intrinsics::kJdkUnsafeGetObjectVolatile) ||
(instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectAcquire))
<< instruction_->AsInvoke()->GetIntrinsic();
DCHECK_EQ(offset_, 0U);
@@ -1070,8 +1073,8 @@ void CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(LocationSummary* location
}
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
- const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- ArtMethod* method ATTRIBUTE_UNUSED) {
+ const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+ [[maybe_unused]] ArtMethod* method) {
return desired_dispatch_info;
}
@@ -1495,6 +1498,7 @@ void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
}
namespace detail {
+
// Mark which intrinsics we don't have handcrafted code for.
template <Intrinsics T>
struct IsUnimplemented {
@@ -1509,15 +1513,13 @@ struct IsUnimplemented {
UNIMPLEMENTED_INTRINSIC_LIST_X86_64(TRUE_OVERRIDE)
#undef TRUE_OVERRIDE
-#include "intrinsics_list.h"
static constexpr bool kIsIntrinsicUnimplemented[] = {
- false, // kNone
+ false, // kNone
#define IS_UNIMPLEMENTED(Intrinsic, ...) \
- IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
- INTRINSICS_LIST(IS_UNIMPLEMENTED)
+ IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+ ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
#undef IS_UNIMPLEMENTED
};
-#undef INTRINSICS_LIST
} // namespace detail
@@ -1531,11 +1533,9 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
kNumberOfCpuRegisters,
kNumberOfFloatRegisters,
kNumberOfCpuRegisterPairs,
- ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
- arraysize(kCoreCalleeSaves))
+ ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves))
| (1 << kFakeReturnRegister),
- ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
- arraysize(kFpuCalleeSaves)),
+ ComputeRegisterMask(kFpuCalleeSaves, arraysize(kFpuCalleeSaves)),
compiler_options,
stats,
ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
@@ -1585,12 +1585,18 @@ static dwarf::Reg DWARFReg(FloatRegister reg) {
}
void LocationsBuilderX86_64::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
- new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+ LocationSummary* locations = new (GetGraph()->GetAllocator())
+ LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+ // We use rdtsc to record the timestamp for method profiling. rdtsc returns
+ // two 32-bit values in EAX + EDX even on 64-bit architectures.
+ locations->AddTemp(Location::RegisterLocation(RAX));
+ locations->AddTemp(Location::RegisterLocation(RDX));
}
void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* instruction) {
SlowPathCode* slow_path =
new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86_64(instruction);
+ LocationSummary* locations = instruction->GetLocations();
codegen_->AddSlowPath(slow_path);
if (instruction->IsMethodExitHook()) {
@@ -1609,8 +1615,51 @@ void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* i
instrumentation::Instrumentation::HaveMethodExitListenersOffset()
: instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
__ movq(CpuRegister(TMP), Immediate(address + offset.Int32Value()));
- __ cmpb(Address(CpuRegister(TMP), 0), Immediate(0));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ cmpb(Address(CpuRegister(TMP), 0),
+ Immediate(instrumentation::Instrumentation::kFastTraceListeners));
+ // Check if there are any method entry / exit listeners. If no, continue with execution.
+ __ j(kLess, slow_path->GetExitLabel());
+ // Check if there are any slow method entry / exit listeners. If yes, take the slow path.
+ __ j(kGreater, slow_path->GetEntryLabel());
+
+ // Check if there is place in the buffer for a new entry, if no, take slow path.
+ CpuRegister index = locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister entry_addr = CpuRegister(TMP);
+ uint64_t trace_buffer_index_offset =
+ Thread::TraceBufferIndexOffset<kX86_64PointerSize>().SizeValue();
+ __ gs()->movq(CpuRegister(index),
+ Address::Absolute(trace_buffer_index_offset, /* no_rip= */ true));
+ __ subq(CpuRegister(index), Immediate(kNumEntriesForWallClock));
+ __ j(kLess, slow_path->GetEntryLabel());
+
+ // Update the index in the `Thread`.
+ __ gs()->movq(Address::Absolute(trace_buffer_index_offset, /* no_rip= */ true),
+ CpuRegister(index));
+ // Calculate the entry address in the buffer.
+ // entry_addr = base_addr + sizeof(void*) * index
+ __ gs()->movq(entry_addr,
+ Address::Absolute(Thread::TraceBufferPtrOffset<kX86_64PointerSize>().SizeValue(),
+ /* no_rip= */ true));
+ __ leaq(CpuRegister(entry_addr),
+ Address(CpuRegister(entry_addr), CpuRegister(index), TIMES_8, 0));
+
+ // Record method pointer and action.
+ CpuRegister method = index;
+ __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
+ // Use last two bits to encode trace method action. For MethodEntry it is 0
+ // so no need to set the bits since they are 0 already.
+ if (instruction->IsMethodExitHook()) {
+ DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
+ static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
+ static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
+ __ orq(method, Immediate(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
+ }
+ __ movq(Address(entry_addr, kMethodOffsetInBytes), CpuRegister(method));
+ // Get the timestamp. rdtsc returns timestamp in RAX + RDX even in 64-bit architectures.
+ __ rdtsc();
+ __ shlq(CpuRegister(RDX), Immediate(32));
+ __ orq(CpuRegister(RAX), CpuRegister(RDX));
+ __ movq(Address(entry_addr, kTimestampOffsetInBytes), CpuRegister(RAX));
__ Bind(slow_path->GetExitLabel());
}
@@ -1651,6 +1700,10 @@ void LocationsBuilderX86_64::VisitMethodExitHook(HMethodExitHook* method_hook) {
LocationSummary* locations = new (GetGraph()->GetAllocator())
LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
SetInForReturnValue(method_hook, locations);
+ // We use rdtsc to record the timestamp for method profiling. rdtsc returns
+ // two 32-bit values in EAX + EDX even on 64-bit architectures.
+ locations->AddTemp(Location::RegisterLocation(RAX));
+ locations->AddTemp(Location::RegisterLocation(RDX));
}
void InstructionCodeGeneratorX86_64::VisitMethodExitHook(HMethodExitHook* instruction) {
@@ -1949,8 +2002,9 @@ void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
}
-void CodeGeneratorX86_64::MoveLocation(
- Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) {
+void CodeGeneratorX86_64::MoveLocation(Location dst,
+ Location src,
+ [[maybe_unused]] DataType::Type dst_type) {
Move(dst, src);
}
@@ -2009,8 +2063,7 @@ void LocationsBuilderX86_64::VisitExit(HExit* exit) {
exit->SetLocations(nullptr);
}
-void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
-}
+void InstructionCodeGeneratorX86_64::VisitExit([[maybe_unused]] HExit* exit) {}
template<class LabelType>
void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
@@ -2051,7 +2104,7 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition)
} else if (right.IsConstant()) {
__ ucomiss(left.AsFpuRegister<XmmRegister>(),
codegen_->LiteralFloatAddress(
- right.GetConstant()->AsFloatConstant()->GetValue()));
+ right.GetConstant()->AsFloatConstant()->GetValue()));
} else {
DCHECK(right.IsStackSlot());
__ ucomiss(left.AsFpuRegister<XmmRegister>(),
@@ -2065,7 +2118,7 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition)
} else if (right.IsConstant()) {
__ ucomisd(left.AsFpuRegister<XmmRegister>(),
codegen_->LiteralDoubleAddress(
- right.GetConstant()->AsDoubleConstant()->GetValue()));
+ right.GetConstant()->AsDoubleConstant()->GetValue()));
} else {
DCHECK(right.IsDoubleStackSlot());
__ ucomisd(left.AsFpuRegister<XmmRegister>(),
@@ -2657,7 +2710,7 @@ void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
// Will be generated at use site.
}
@@ -2667,7 +2720,7 @@ void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
// Will be generated at use site.
}
@@ -2677,7 +2730,7 @@ void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
// Will be generated at use site.
}
@@ -2687,7 +2740,7 @@ void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
// Will be generated at use site.
}
@@ -2698,7 +2751,7 @@ void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
}
void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
- HDoubleConstant* constant ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HDoubleConstant* constant) {
// Will be generated at use site.
}
@@ -2707,7 +2760,7 @@ void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructo
}
void InstructionCodeGeneratorX86_64::VisitConstructorFence(
- HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HConstructorFence* constructor_fence) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
}
@@ -2723,7 +2776,7 @@ void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
ret->SetLocations(nullptr);
}
-void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
codegen_->GenerateFrameExit();
}
@@ -4972,7 +5025,7 @@ void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
}
void InstructionCodeGeneratorX86_64::VisitParameterValue(
- HParameterValue* instruction ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HParameterValue* instruction) {
// Nothing to do, the parameter is already at its location.
}
@@ -4983,7 +5036,7 @@ void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
}
void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
- HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HCurrentMethod* instruction) {
// Nothing to do, the method is already at its location.
}
@@ -5062,7 +5115,7 @@ void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
locations->SetOut(Location::Any());
}
-void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitPhi([[maybe_unused]] HPhi* instruction) {
LOG(FATAL) << "Unimplemented";
}
@@ -5930,8 +5983,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
__ movsd(address, value.AsFpuRegister<XmmRegister>());
codegen_->MaybeRecordImplicitNullCheck(instruction);
} else {
- int64_t v =
- bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
+ int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
Address address_high =
CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
@@ -6084,7 +6136,7 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
}
}
-void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderX86_64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
LOG(FATAL) << "Unimplemented";
}
@@ -6471,7 +6523,9 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
locations->SetInAt(0, Location::RequiresRegister());
}
locations->SetOut(Location::RequiresRegister());
- if (load_kind == HLoadClass::LoadKind::kBssEntry) {
+ if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
if (!gUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the type resolution and/or initialization to save everything.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
@@ -6507,9 +6561,8 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
Location out_loc = locations->Out();
CpuRegister out = out_loc.AsRegister<CpuRegister>();
- const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
- ? kWithoutReadBarrier
- : gCompilerReadBarrierOption;
+ const ReadBarrierOption read_barrier_option =
+ cls->IsInBootImage() ? kWithoutReadBarrier : GetCompilerReadBarrierOption();
bool generate_null_check = false;
switch (load_kind) {
case HLoadClass::LoadKind::kReferrersClass: {
@@ -6704,7 +6757,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA
/* no_rip= */ false);
Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
// /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
- GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
+ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, GetCompilerReadBarrierOption());
// No need for memory fence, thanks to the x86-64 memory model.
SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
codegen_->AddSlowPath(slow_path);
@@ -6725,14 +6778,13 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA
Label* fixup_label = codegen_->NewJitRootStringPatch(
load->GetDexFile(), load->GetStringIndex(), load->GetString());
// /* GcRoot<mirror::String> */ out = *address
- GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
+ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, GetCompilerReadBarrierOption());
return;
}
default:
break;
}
- // TODO: Re-add the compiler code to do string dex cache lookup again.
// Custom calling convention: RAX serves as both input and output.
__ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
codegen_->InvokeRuntime(kQuickResolveString,
@@ -6760,7 +6812,7 @@ void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
}
-void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitClearException([[maybe_unused]] HClearException* clear) {
__ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
}
@@ -7112,7 +7164,6 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
} else {
locations->SetInAt(1, Location::Any());
}
- // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
@@ -7301,11 +7352,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
kWithoutReadBarrier);
// /* HeapReference<Class> */ temp = temp->iftable_
- GenerateReferenceLoadTwoRegisters(instruction,
- temp_loc,
- temp_loc,
- iftable_offset,
- kWithoutReadBarrier);
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ iftable_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// Iftable is never null.
__ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
// Maybe poison the `cls` for direct comparison with memory.
@@ -7830,12 +7881,12 @@ void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instructi
__ Bind(slow_path->GetExitLabel());
}
-void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
// Nothing to do, this should be removed during prepare for register allocator.
LOG(FATAL) << "Unreachable";
}
-void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
// Nothing to do, this should be removed during prepare for register allocator.
LOG(FATAL) << "Unreachable";
}
@@ -7930,13 +7981,13 @@ void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_ins
__ jmp(temp_reg);
}
-void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
- ATTRIBUTE_UNUSED) {
+void LocationsBuilderX86_64::VisitIntermediateAddress(
+ [[maybe_unused]] HIntermediateAddress* instruction) {
LOG(FATAL) << "Unreachable";
}
-void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
- ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(
+ [[maybe_unused]] HIntermediateAddress* instruction) {
LOG(FATAL) << "Unreachable";
}
@@ -8037,9 +8088,9 @@ Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
Location index,
ScaleFactor scale,
uint32_t data_offset) {
- return index.IsConstant() ?
- Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
- Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
+ return index.IsConstant()
+ ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset)
+ : Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
}
void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
@@ -8119,7 +8170,7 @@ class JumpTableRIPFixup : public RIPFixup {
const HPackedSwitch* switch_instr_;
};
-void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
+void CodeGeneratorX86_64::Finalize() {
// Generate the constant area if needed.
X86_64Assembler* assembler = GetAssembler();
if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
@@ -8137,7 +8188,7 @@ void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
}
// And finish up.
- CodeGenerator::Finalize(allocator);
+ CodeGenerator::Finalize();
}
Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
@@ -8217,7 +8268,7 @@ void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
- dchecked_integral_cast<uint32_t>(address);
+ dchecked_integral_cast<uint32_t>(address);
}
void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index dff2e799e0..5a940c1466 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -162,16 +162,16 @@ class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention {
Location GetFieldIndexLocation() const override {
return Location::RegisterLocation(RDI);
}
- Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
+ Location GetReturnLocation([[maybe_unused]] DataType::Type type) const override {
return Location::RegisterLocation(RAX);
}
- Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, bool is_instance)
- const override {
+ Location GetSetValueLocation([[maybe_unused]] DataType::Type type,
+ bool is_instance) const override {
return is_instance
? Location::RegisterLocation(RDX)
: Location::RegisterLocation(RSI);
}
- Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
+ Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override {
return Location::FpuRegisterLocation(XMM0);
}
@@ -468,7 +468,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
void SetupBlockedRegisters() const override;
void DumpCoreRegister(std::ostream& stream, int reg) const override;
void DumpFloatingPointRegister(std::ostream& stream, int reg) const override;
- void Finalize(CodeAllocator* allocator) override;
+ void Finalize() override;
InstructionSet GetInstructionSet() const override {
return InstructionSet::kX86_64;
@@ -502,9 +502,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
block_labels_ = CommonInitializeLabels<Label>();
}
- bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override {
- return false;
- }
+ bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; }
// Check if the desired_string_load_kind is supported. If it is, return it,
// otherwise return a fall-back kind that should be used instead.
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
index d759a16f48..33b5bd5169 100644
--- a/compiler/optimizing/code_sinking.cc
+++ b/compiler/optimizing/code_sinking.cc
@@ -16,6 +16,9 @@
#include "code_sinking.h"
+#include <sstream>
+
+#include "android-base/logging.h"
#include "base/arena_bit_vector.h"
#include "base/array_ref.h"
#include "base/bit_vector-inl.h"
@@ -335,10 +338,6 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
processed_instructions.ClearAllBits();
ArenaBitVector post_dominated(&allocator, graph_->GetBlocks().size(), /* expandable= */ false);
post_dominated.ClearAllBits();
- ArenaBitVector instructions_that_can_move(
- &allocator, number_of_instructions, /* expandable= */ false);
- instructions_that_can_move.ClearAllBits();
- ScopedArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc));
// Step (1): Visit post order to get a subset of blocks post dominated by `end_block`.
// TODO(ngeoffray): Getting the full set of post-dominated should be done by
@@ -411,6 +410,13 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
HBasicBlock* common_dominator = finder.Get();
// Step (2): iterate over the worklist to find sinking candidates.
+ ArenaBitVector instructions_that_can_move(
+ &allocator, number_of_instructions, /* expandable= */ false);
+ instructions_that_can_move.ClearAllBits();
+ ScopedArenaVector<ScopedArenaVector<HInstruction*>> instructions_to_move(
+ graph_->GetBlocks().size(),
+ ScopedArenaVector<HInstruction*>(allocator.Adapter(kArenaAllocMisc)),
+ allocator.Adapter(kArenaAllocMisc));
while (!worklist.empty()) {
HInstruction* instruction = worklist.back();
if (processed_instructions.IsBitSet(instruction->GetId())) {
@@ -467,7 +473,7 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
// Instruction is a candidate for being sunk. Mark it as such, remove it from the
// work list, and add its inputs to the work list.
instructions_that_can_move.SetBit(instruction->GetId());
- move_in_order.push_back(instruction);
+ instructions_to_move[instruction->GetBlock()->GetBlockId()].push_back(instruction);
processed_instructions.SetBit(instruction->GetId());
worklist.pop_back();
AddInputs(instruction, processed_instructions, post_dominated, &worklist);
@@ -493,14 +499,50 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
}
}
- // Make sure we process instructions in dominated order. This is required for heap
- // stores.
- std::sort(move_in_order.begin(), move_in_order.end(), [](HInstruction* a, HInstruction* b) {
- return b->StrictlyDominates(a);
- });
+ // We want to process the instructions in reverse dominated order. This is required for heap
+ // stores. To guarantee this (including the transitivity of incomparability) we have some extra
+ // bookkeeping.
+ ScopedArenaVector<HInstruction*> instructions_to_move_sorted(allocator.Adapter(kArenaAllocMisc));
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
+ const int block_id = block->GetBlockId();
+
+ // Order the block itself first.
+ std::sort(instructions_to_move[block_id].begin(),
+ instructions_to_move[block_id].end(),
+ [&block](HInstruction* a, HInstruction* b) {
+ return block->GetInstructions().FoundBefore(b, a);
+ });
+
+ for (HInstruction* instruction : instructions_to_move[block_id]) {
+ instructions_to_move_sorted.push_back(instruction);
+ }
+ }
+
+ if (kIsDebugBuild) {
+ // We should have ordered the instructions in reverse dominated order. This means that
+ // instructions shouldn't dominate instructions that come after it in the vector.
+ for (size_t i = 0; i < instructions_to_move_sorted.size(); ++i) {
+ for (size_t j = i + 1; j < instructions_to_move_sorted.size(); ++j) {
+ if (instructions_to_move_sorted[i]->StrictlyDominates(instructions_to_move_sorted[j])) {
+ std::stringstream ss;
+ graph_->Dump(ss, nullptr);
+ ss << "\n"
+ << "{";
+ for (HInstruction* instr : instructions_to_move_sorted) {
+ ss << *instr << " in block: " << instr->GetBlock() << ", ";
+ }
+ ss << "}\n";
+ ss << "i = " << i << " which is " << *instructions_to_move_sorted[i]
+ << "strictly dominates j = " << j << " which is " << *instructions_to_move_sorted[j]
+ << "\n";
+ LOG(FATAL) << "Unexpected ordering of code sinking instructions: " << ss.str();
+ }
+ }
+ }
+ }
// Step (3): Try to move sinking candidates.
- for (HInstruction* instruction : move_in_order) {
+ for (HInstruction* instruction : instructions_to_move_sorted) {
HInstruction* position = nullptr;
if (instruction->IsArraySet()
|| instruction->IsInstanceFieldSet()
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 2d9acc49b3..c72d3ea24a 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -733,8 +733,7 @@ TEST_F(CodegenTest, ARMVIXLParallelMoveResolver) {
move->AddMove(Location::StackSlot(8192), Location::StackSlot(0), DataType::Type::kInt32, nullptr);
codegen.GetMoveResolver()->EmitNativeCode(move);
- InternalCodeAllocator code_allocator;
- codegen.Finalize(&code_allocator);
+ codegen.Finalize();
}
#endif
@@ -785,8 +784,7 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) {
nullptr);
codegen.GetMoveResolver()->EmitNativeCode(move);
- InternalCodeAllocator code_allocator;
- codegen.Finalize(&code_allocator);
+ codegen.Finalize();
}
// Check that ParallelMoveResolver works fine for ARM64 for both cases when SIMD is on and off.
@@ -798,7 +796,7 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) {
codegen.Initialize();
- graph->SetHasSIMD(true);
+ graph->SetHasTraditionalSIMD(true);
for (int i = 0; i < 2; i++) {
HParallelMove* move = new (graph->GetAllocator()) HParallelMove(graph->GetAllocator());
move->AddMove(Location::SIMDStackSlot(0),
@@ -818,11 +816,10 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) {
DataType::Type::kFloat64,
nullptr);
codegen.GetMoveResolver()->EmitNativeCode(move);
- graph->SetHasSIMD(false);
+ graph->SetHasTraditionalSIMD(false);
}
- InternalCodeAllocator code_allocator;
- codegen.Finalize(&code_allocator);
+ codegen.Finalize();
}
// Check that ART ISA Features are propagated to VIXL for arm64 (using cortex-a75 as example).
@@ -867,7 +864,7 @@ TEST_F(CodegenTest, ARM64FrameSizeSIMD) {
arm64::CodeGeneratorARM64 codegen(graph, *compiler_options);
codegen.Initialize();
- graph->SetHasSIMD(true);
+ graph->SetHasTraditionalSIMD(true);
DCHECK_EQ(arm64::callee_saved_fp_registers.GetCount(), 8);
vixl::aarch64::CPURegList reg_list = arm64::callee_saved_fp_registers;
@@ -887,7 +884,8 @@ TEST_F(CodegenTest, ARM64FrameSizeNoSIMD) {
arm64::CodeGeneratorARM64 codegen(graph, *compiler_options);
codegen.Initialize();
- graph->SetHasSIMD(false);
+ graph->SetHasTraditionalSIMD(false);
+ graph->SetHasPredicatedSIMD(false);
DCHECK_EQ(arm64::callee_saved_fp_registers.GetCount(), 8);
vixl::aarch64::CPURegList reg_list = arm64::callee_saved_fp_registers;
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index 7af9d0f44c..a8425c9915 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -103,8 +103,8 @@ class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL {
blocked_core_registers_[arm::R7] = false;
}
- void MaybeGenerateMarkingRegisterCheck(int code ATTRIBUTE_UNUSED,
- Location temp_loc ATTRIBUTE_UNUSED) override {
+ void MaybeGenerateMarkingRegisterCheck([[maybe_unused]] int code,
+ [[maybe_unused]] Location temp_loc) override {
// When turned on, the marking register checks in
// CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck expects the
// Thread Register and the Marking Register to be set to
@@ -135,8 +135,8 @@ class TestCodeGeneratorARM64 : public arm64::CodeGeneratorARM64 {
TestCodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options)
: arm64::CodeGeneratorARM64(graph, compiler_options) {}
- void MaybeGenerateMarkingRegisterCheck(int codem ATTRIBUTE_UNUSED,
- Location temp_loc ATTRIBUTE_UNUSED) override {
+ void MaybeGenerateMarkingRegisterCheck([[maybe_unused]] int codem,
+ [[maybe_unused]] Location temp_loc) override {
// When turned on, the marking register checks in
// CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck expect the
// Thread Register and the Marking Register to be set to
@@ -167,28 +167,6 @@ class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
};
#endif
-class InternalCodeAllocator : public CodeAllocator {
- public:
- InternalCodeAllocator() : size_(0) { }
-
- uint8_t* Allocate(size_t size) override {
- size_ = size;
- memory_.reset(new uint8_t[size]);
- return memory_.get();
- }
-
- size_t GetSize() const { return size_; }
- ArrayRef<const uint8_t> GetMemory() const override {
- return ArrayRef<const uint8_t>(memory_.get(), size_);
- }
-
- private:
- size_t size_;
- std::unique_ptr<uint8_t[]> memory_;
-
- DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
-};
-
static bool CanExecuteOnHardware(InstructionSet target_isa) {
return (target_isa == kRuntimeISA)
// Handle the special case of ARM, with two instructions sets (ARM32 and Thumb-2).
@@ -247,8 +225,7 @@ static void VerifyGeneratedCode(InstructionSet target_isa,
}
template <typename Expected>
-static void Run(const InternalCodeAllocator& allocator,
- const CodeGenerator& codegen,
+static void Run(const CodeGenerator& codegen,
bool has_result,
Expected expected) {
InstructionSet target_isa = codegen.GetInstructionSet();
@@ -260,7 +237,7 @@ static void Run(const InternalCodeAllocator& allocator,
};
CodeHolder code_holder;
const void* method_code =
- code_holder.MakeExecutable(allocator.GetMemory(), ArrayRef<const uint8_t>(), target_isa);
+ code_holder.MakeExecutable(codegen.GetCode(), ArrayRef<const uint8_t>(), target_isa);
using fptr = Expected (*)();
fptr f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(method_code));
@@ -294,9 +271,8 @@ static void RunCodeNoCheck(CodeGenerator* codegen,
register_allocator->AllocateRegisters();
}
hook_before_codegen(graph);
- InternalCodeAllocator allocator;
- codegen->Compile(&allocator);
- Run(allocator, *codegen, has_result, expected);
+ codegen->Compile();
+ Run(*codegen, has_result, expected);
}
template <typename Expected>
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index 20b0e38af5..e2ef8d52f2 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -311,10 +311,8 @@ inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction*
}
}
-inline Location ARM64EncodableConstantOrRegister(HInstruction* constant,
- HInstruction* instr) {
- if (constant->IsConstant()
- && Arm64CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
+inline Location ARM64EncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) {
+ if (constant->IsConstant() && Arm64CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
return Location::ConstantLocation(constant);
}
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc
index 06d19e3f29..e20d9e83e6 100644
--- a/compiler/optimizing/constant_folding.cc
+++ b/compiler/optimizing/constant_folding.cc
@@ -37,6 +37,13 @@ class HConstantFoldingVisitor final : public HGraphDelegateVisitor {
void VisitUnaryOperation(HUnaryOperation* inst) override;
void VisitBinaryOperation(HBinaryOperation* inst) override;
+ // Tries to replace constants in binary operations like:
+ // * BinaryOp(Select(false_constant, true_constant, condition), other_constant), or
+ // * BinaryOp(other_constant, Select(false_constant, true_constant, condition))
+ // with consolidated constants. For example, Add(Select(10, 20, condition), 5) can be replaced
+ // with Select(15, 25, condition).
+ bool TryRemoveBinaryOperationViaSelect(HBinaryOperation* inst);
+
void VisitArrayLength(HArrayLength* inst) override;
void VisitDivZeroCheck(HDivZeroCheck* inst) override;
void VisitIf(HIf* inst) override;
@@ -113,9 +120,69 @@ void HConstantFoldingVisitor::VisitUnaryOperation(HUnaryOperation* inst) {
if (constant != nullptr) {
inst->ReplaceWith(constant);
inst->GetBlock()->RemoveInstruction(inst);
+ } else if (inst->InputAt(0)->IsSelect() && inst->InputAt(0)->HasOnlyOneNonEnvironmentUse()) {
+ // Try to replace the select's inputs in Select+UnaryOperation cases. We can do this if both
+ // inputs to the select are constants, and this is the only use of the select.
+ HSelect* select = inst->InputAt(0)->AsSelect();
+ HConstant* false_constant = inst->TryStaticEvaluation(select->GetFalseValue());
+ if (false_constant == nullptr) {
+ return;
+ }
+ HConstant* true_constant = inst->TryStaticEvaluation(select->GetTrueValue());
+ if (true_constant == nullptr) {
+ return;
+ }
+ DCHECK_EQ(select->InputAt(0), select->GetFalseValue());
+ DCHECK_EQ(select->InputAt(1), select->GetTrueValue());
+ select->ReplaceInput(false_constant, 0);
+ select->ReplaceInput(true_constant, 1);
+ select->UpdateType();
+ inst->ReplaceWith(select);
+ inst->GetBlock()->RemoveInstruction(inst);
}
}
+bool HConstantFoldingVisitor::TryRemoveBinaryOperationViaSelect(HBinaryOperation* inst) {
+ if (inst->GetLeft()->IsSelect() == inst->GetRight()->IsSelect()) {
+ // If both of them are constants, VisitBinaryOperation already tried the static evaluation. If
+ // both of them are selects, then we can't simplify.
+ // TODO(solanes): Technically, if both of them are selects we could simplify iff both select's
+ // conditions are equal e.g. Add(Select(1, 2, cond), Select(3, 4, cond)) could be replaced with
+ // Select(4, 6, cond). This seems very unlikely to happen so we don't implement it.
+ return false;
+ }
+
+ const bool left_is_select = inst->GetLeft()->IsSelect();
+ HSelect* select = left_is_select ? inst->GetLeft()->AsSelect() : inst->GetRight()->AsSelect();
+ HInstruction* maybe_constant = left_is_select ? inst->GetRight() : inst->GetLeft();
+
+ if (select->HasOnlyOneNonEnvironmentUse()) {
+ // Try to replace the select's inputs in Select+BinaryOperation. We can do this if both
+ // inputs to the select are constants, and this is the only use of the select.
+ HConstant* false_constant =
+ inst->TryStaticEvaluation(left_is_select ? select->GetFalseValue() : maybe_constant,
+ left_is_select ? maybe_constant : select->GetFalseValue());
+ if (false_constant == nullptr) {
+ return false;
+ }
+ HConstant* true_constant =
+ inst->TryStaticEvaluation(left_is_select ? select->GetTrueValue() : maybe_constant,
+ left_is_select ? maybe_constant : select->GetTrueValue());
+ if (true_constant == nullptr) {
+ return false;
+ }
+ DCHECK_EQ(select->InputAt(0), select->GetFalseValue());
+ DCHECK_EQ(select->InputAt(1), select->GetTrueValue());
+ select->ReplaceInput(false_constant, 0);
+ select->ReplaceInput(true_constant, 1);
+ select->UpdateType();
+ inst->ReplaceWith(select);
+ inst->GetBlock()->RemoveInstruction(inst);
+ return true;
+ }
+ return false;
+}
+
void HConstantFoldingVisitor::VisitBinaryOperation(HBinaryOperation* inst) {
// Constant folding: replace `op(a, b)' with a constant at
// compile time if `a' and `b' are both constants.
@@ -123,6 +190,8 @@ void HConstantFoldingVisitor::VisitBinaryOperation(HBinaryOperation* inst) {
if (constant != nullptr) {
inst->ReplaceWith(constant);
inst->GetBlock()->RemoveInstruction(inst);
+ } else if (TryRemoveBinaryOperationViaSelect(inst)) {
+ // Already replaced inside TryRemoveBinaryOperationViaSelect.
} else {
InstructionWithAbsorbingInputSimplifier simplifier(GetGraph());
inst->Accept(&simplifier);
@@ -299,6 +368,25 @@ void HConstantFoldingVisitor::VisitTypeConversion(HTypeConversion* inst) {
if (constant != nullptr) {
inst->ReplaceWith(constant);
inst->GetBlock()->RemoveInstruction(inst);
+ } else if (inst->InputAt(0)->IsSelect() && inst->InputAt(0)->HasOnlyOneNonEnvironmentUse()) {
+ // Try to replace the select's inputs in Select+TypeConversion. We can do this if both
+ // inputs to the select are constants, and this is the only use of the select.
+ HSelect* select = inst->InputAt(0)->AsSelect();
+ HConstant* false_constant = inst->TryStaticEvaluation(select->GetFalseValue());
+ if (false_constant == nullptr) {
+ return;
+ }
+ HConstant* true_constant = inst->TryStaticEvaluation(select->GetTrueValue());
+ if (true_constant == nullptr) {
+ return;
+ }
+ DCHECK_EQ(select->InputAt(0), select->GetFalseValue());
+ DCHECK_EQ(select->InputAt(1), select->GetTrueValue());
+ select->ReplaceInput(false_constant, 0);
+ select->ReplaceInput(true_constant, 1);
+ select->UpdateType();
+ inst->ReplaceWith(select);
+ inst->GetBlock()->RemoveInstruction(inst);
}
}
@@ -583,7 +671,7 @@ void InstructionWithAbsorbingInputSimplifier::VisitRem(HRem* instruction) {
block->RemoveInstruction(instruction);
}
- HConstant* cst_right = instruction->GetRight()->AsConstant();
+ HConstant* cst_right = instruction->GetRight()->AsConstantOrNull();
if (((cst_right != nullptr) &&
(cst_right->IsOne() || cst_right->IsMinusOne())) ||
(instruction->GetLeft() == instruction->GetRight())) {
diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.cc b/compiler/optimizing/constructor_fence_redundancy_elimination.cc
index d9b7652f32..48635cfd15 100644
--- a/compiler/optimizing/constructor_fence_redundancy_elimination.cc
+++ b/compiler/optimizing/constructor_fence_redundancy_elimination.cc
@@ -78,7 +78,7 @@ class CFREVisitor final : public HGraphVisitor {
VisitSetLocation(instruction, value);
}
- void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) override {
+ void VisitDeoptimize([[maybe_unused]] HDeoptimize* instruction) override {
// Pessimize: Merge all fences.
MergeCandidateFences();
}
@@ -151,7 +151,7 @@ class CFREVisitor final : public HGraphVisitor {
}
}
- void VisitSetLocation(HInstruction* inst ATTRIBUTE_UNUSED, HInstruction* store_input) {
+ void VisitSetLocation([[maybe_unused]] HInstruction* inst, HInstruction* store_input) {
// An object is considered "published" if it's stored onto the heap.
// Sidenote: A later "LSE" pass can still remove the fence if it proves the
// object doesn't actually escape.
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index cf49e39849..8e6b6db236 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -24,6 +24,7 @@
#include "base/scoped_arena_containers.h"
#include "base/stl_util.h"
#include "optimizing/nodes.h"
+#include "optimizing/nodes_vector.h"
#include "ssa_phi_elimination.h"
namespace art HIDDEN {
@@ -842,7 +843,8 @@ void HDeadCodeElimination::RemoveDeadInstructions() {
void HDeadCodeElimination::UpdateGraphFlags() {
bool has_monitor_operations = false;
- bool has_simd = false;
+ bool has_traditional_simd = false;
+ bool has_predicated_simd = false;
bool has_bounds_checks = false;
bool has_always_throwing_invokes = false;
@@ -852,7 +854,12 @@ void HDeadCodeElimination::UpdateGraphFlags() {
if (instruction->IsMonitorOperation()) {
has_monitor_operations = true;
} else if (instruction->IsVecOperation()) {
- has_simd = true;
+ HVecOperation* vec_instruction = instruction->AsVecOperation();
+ if (vec_instruction->IsPredicated()) {
+ has_predicated_simd = true;
+ } else {
+ has_traditional_simd = true;
+ }
} else if (instruction->IsBoundsCheck()) {
has_bounds_checks = true;
} else if (instruction->IsInvoke() && instruction->AsInvoke()->AlwaysThrows()) {
@@ -862,7 +869,8 @@ void HDeadCodeElimination::UpdateGraphFlags() {
}
graph_->SetHasMonitorOperations(has_monitor_operations);
- graph_->SetHasSIMD(has_simd);
+ graph_->SetHasTraditionalSIMD(has_traditional_simd);
+ graph_->SetHasPredicatedSIMD(has_predicated_simd);
graph_->SetHasBoundsChecks(has_bounds_checks);
graph_->SetHasAlwaysThrowingInvokes(has_always_throwing_invokes);
}
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 190b362145..31ba3fe98a 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -168,52 +168,68 @@ void GraphChecker::CheckGraphFlags() {
void GraphChecker::VisitBasicBlock(HBasicBlock* block) {
current_block_ = block;
- // Use local allocator for allocating memory.
- ScopedArenaAllocator allocator(GetGraph()->GetArenaStack());
-
- // Check consistency with respect to predecessors of `block`.
- // Note: Counting duplicates with a sorted vector uses up to 6x less memory
- // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse.
- ScopedArenaVector<HBasicBlock*> sorted_predecessors(allocator.Adapter(kArenaAllocGraphChecker));
- sorted_predecessors.assign(block->GetPredecessors().begin(), block->GetPredecessors().end());
- std::sort(sorted_predecessors.begin(), sorted_predecessors.end());
- for (auto it = sorted_predecessors.begin(), end = sorted_predecessors.end(); it != end; ) {
- HBasicBlock* p = *it++;
- size_t p_count_in_block_predecessors = 1u;
- for (; it != end && *it == p; ++it) {
- ++p_count_in_block_predecessors;
- }
- size_t block_count_in_p_successors =
- std::count(p->GetSuccessors().begin(), p->GetSuccessors().end(), block);
- if (p_count_in_block_predecessors != block_count_in_p_successors) {
- AddError(StringPrintf(
- "Block %d lists %zu occurrences of block %d in its predecessors, whereas "
- "block %d lists %zu occurrences of block %d in its successors.",
- block->GetBlockId(), p_count_in_block_predecessors, p->GetBlockId(),
- p->GetBlockId(), block_count_in_p_successors, block->GetBlockId()));
- }
- }
+ {
+ // Use local allocator for allocating memory. We use C++ scopes (i.e. `{}`) to reclaim the
+ // memory as soon as possible, and to end the scope of this `ScopedArenaAllocator`.
+ ScopedArenaAllocator allocator(GetGraph()->GetArenaStack());
- // Check consistency with respect to successors of `block`.
- // Note: Counting duplicates with a sorted vector uses up to 6x less memory
- // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse.
- ScopedArenaVector<HBasicBlock*> sorted_successors(allocator.Adapter(kArenaAllocGraphChecker));
- sorted_successors.assign(block->GetSuccessors().begin(), block->GetSuccessors().end());
- std::sort(sorted_successors.begin(), sorted_successors.end());
- for (auto it = sorted_successors.begin(), end = sorted_successors.end(); it != end; ) {
- HBasicBlock* s = *it++;
- size_t s_count_in_block_successors = 1u;
- for (; it != end && *it == s; ++it) {
- ++s_count_in_block_successors;
+ {
+ // Check consistency with respect to predecessors of `block`.
+ // Note: Counting duplicates with a sorted vector uses up to 6x less memory
+ // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse.
+ ScopedArenaVector<HBasicBlock*> sorted_predecessors(
+ allocator.Adapter(kArenaAllocGraphChecker));
+ sorted_predecessors.assign(block->GetPredecessors().begin(), block->GetPredecessors().end());
+ std::sort(sorted_predecessors.begin(), sorted_predecessors.end());
+ for (auto it = sorted_predecessors.begin(), end = sorted_predecessors.end(); it != end;) {
+ HBasicBlock* p = *it++;
+ size_t p_count_in_block_predecessors = 1u;
+ for (; it != end && *it == p; ++it) {
+ ++p_count_in_block_predecessors;
+ }
+ size_t block_count_in_p_successors =
+ std::count(p->GetSuccessors().begin(), p->GetSuccessors().end(), block);
+ if (p_count_in_block_predecessors != block_count_in_p_successors) {
+ AddError(StringPrintf(
+ "Block %d lists %zu occurrences of block %d in its predecessors, whereas "
+ "block %d lists %zu occurrences of block %d in its successors.",
+ block->GetBlockId(),
+ p_count_in_block_predecessors,
+ p->GetBlockId(),
+ p->GetBlockId(),
+ block_count_in_p_successors,
+ block->GetBlockId()));
+ }
+ }
}
- size_t block_count_in_s_predecessors =
- std::count(s->GetPredecessors().begin(), s->GetPredecessors().end(), block);
- if (s_count_in_block_successors != block_count_in_s_predecessors) {
- AddError(StringPrintf(
- "Block %d lists %zu occurrences of block %d in its successors, whereas "
- "block %d lists %zu occurrences of block %d in its predecessors.",
- block->GetBlockId(), s_count_in_block_successors, s->GetBlockId(),
- s->GetBlockId(), block_count_in_s_predecessors, block->GetBlockId()));
+
+ {
+ // Check consistency with respect to successors of `block`.
+ // Note: Counting duplicates with a sorted vector uses up to 6x less memory
+ // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse.
+ ScopedArenaVector<HBasicBlock*> sorted_successors(allocator.Adapter(kArenaAllocGraphChecker));
+ sorted_successors.assign(block->GetSuccessors().begin(), block->GetSuccessors().end());
+ std::sort(sorted_successors.begin(), sorted_successors.end());
+ for (auto it = sorted_successors.begin(), end = sorted_successors.end(); it != end;) {
+ HBasicBlock* s = *it++;
+ size_t s_count_in_block_successors = 1u;
+ for (; it != end && *it == s; ++it) {
+ ++s_count_in_block_successors;
+ }
+ size_t block_count_in_s_predecessors =
+ std::count(s->GetPredecessors().begin(), s->GetPredecessors().end(), block);
+ if (s_count_in_block_successors != block_count_in_s_predecessors) {
+ AddError(
+ StringPrintf("Block %d lists %zu occurrences of block %d in its successors, whereas "
+ "block %d lists %zu occurrences of block %d in its predecessors.",
+ block->GetBlockId(),
+ s_count_in_block_successors,
+ s->GetBlockId(),
+ s->GetBlockId(),
+ block_count_in_s_predecessors,
+ block->GetBlockId()));
+ }
+ }
}
}
@@ -587,21 +603,38 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) {
}
// Ensure 'instruction' has pointers to its inputs' use entries.
- auto&& input_records = instruction->GetInputRecords();
- for (size_t i = 0; i < input_records.size(); ++i) {
- const HUserRecord<HInstruction*>& input_record = input_records[i];
- HInstruction* input = input_record.GetInstruction();
- if ((input_record.GetBeforeUseNode() == input->GetUses().end()) ||
- (input_record.GetUseNode() == input->GetUses().end()) ||
- !input->GetUses().ContainsNode(*input_record.GetUseNode()) ||
- (input_record.GetUseNode()->GetIndex() != i)) {
- AddError(StringPrintf("Instruction %s:%d has an invalid iterator before use entry "
- "at input %u (%s:%d).",
- instruction->DebugName(),
- instruction->GetId(),
- static_cast<unsigned>(i),
- input->DebugName(),
- input->GetId()));
+ {
+ auto&& input_records = instruction->GetInputRecords();
+ for (size_t i = 0; i < input_records.size(); ++i) {
+ const HUserRecord<HInstruction*>& input_record = input_records[i];
+ HInstruction* input = input_record.GetInstruction();
+
+ // Populate bookkeeping, if needed. See comment in graph_checker.h for uses_per_instruction_.
+ auto it = uses_per_instruction_.find(input->GetId());
+ if (it == uses_per_instruction_.end()) {
+ it = uses_per_instruction_
+ .insert({input->GetId(),
+ ScopedArenaSet<const art::HUseListNode<art::HInstruction*>*>(
+ allocator_.Adapter(kArenaAllocGraphChecker))})
+ .first;
+ for (auto&& use : input->GetUses()) {
+ it->second.insert(std::addressof(use));
+ }
+ }
+
+ if ((input_record.GetBeforeUseNode() == input->GetUses().end()) ||
+ (input_record.GetUseNode() == input->GetUses().end()) ||
+ (it->second.find(std::addressof(*input_record.GetUseNode())) == it->second.end()) ||
+ (input_record.GetUseNode()->GetIndex() != i)) {
+ AddError(
+ StringPrintf("Instruction %s:%d has an invalid iterator before use entry "
+ "at input %u (%s:%d).",
+ instruction->DebugName(),
+ instruction->GetId(),
+ static_cast<unsigned>(i),
+ input->DebugName(),
+ input->GetId()));
+ }
}
}
@@ -944,8 +977,7 @@ static bool IsSameSizeConstant(const HInstruction* insn1, const HInstruction* in
static bool IsConstantEquivalent(const HInstruction* insn1,
const HInstruction* insn2,
BitVector* visited) {
- if (insn1->IsPhi() &&
- insn1->AsPhi()->IsVRegEquivalentOf(insn2)) {
+ if (insn1->IsPhi() && insn1->AsPhi()->IsVRegEquivalentOf(insn2)) {
HConstInputsRef insn1_inputs = insn1->GetInputs();
HConstInputsRef insn2_inputs = insn2->GetInputs();
if (insn1_inputs.size() != insn2_inputs.size()) {
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index d6644f3b50..aff2358411 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -22,7 +22,7 @@
#include "base/arena_bit_vector.h"
#include "base/bit_vector-inl.h"
#include "base/macros.h"
-#include "base/scoped_arena_allocator.h"
+#include "base/scoped_arena_containers.h"
#include "nodes.h"
namespace art HIDDEN {
@@ -35,12 +35,13 @@ class GraphChecker : public HGraphDelegateVisitor {
explicit GraphChecker(HGraph* graph,
CodeGenerator* codegen = nullptr,
const char* dump_prefix = "art::GraphChecker: ")
- : HGraphDelegateVisitor(graph),
- errors_(graph->GetAllocator()->Adapter(kArenaAllocGraphChecker)),
- dump_prefix_(dump_prefix),
- allocator_(graph->GetArenaStack()),
- seen_ids_(&allocator_, graph->GetCurrentInstructionId(), false, kArenaAllocGraphChecker),
- codegen_(codegen) {
+ : HGraphDelegateVisitor(graph),
+ errors_(graph->GetAllocator()->Adapter(kArenaAllocGraphChecker)),
+ dump_prefix_(dump_prefix),
+ allocator_(graph->GetArenaStack()),
+ seen_ids_(&allocator_, graph->GetCurrentInstructionId(), false, kArenaAllocGraphChecker),
+ uses_per_instruction_(allocator_.Adapter(kArenaAllocGraphChecker)),
+ codegen_(codegen) {
seen_ids_.ClearAllBits();
}
@@ -107,7 +108,7 @@ class GraphChecker : public HGraphDelegateVisitor {
}
}
- protected:
+ private:
// Report a new error.
void AddError(const std::string& error) {
errors_.push_back(error);
@@ -118,7 +119,6 @@ class GraphChecker : public HGraphDelegateVisitor {
// Errors encountered while checking the graph.
ArenaVector<std::string> errors_;
- private:
void VisitReversePostOrder();
// Checks that the graph's flags are set correctly.
@@ -129,6 +129,13 @@ class GraphChecker : public HGraphDelegateVisitor {
ScopedArenaAllocator allocator_;
ArenaBitVector seen_ids_;
+ // As part of VisitInstruction, we verify that the instruction's input_record is present in the
+ // corresponding input's GetUses. If an instruction is used in many places (e.g. 200K+ uses), the
+ // linear search through GetUses is too slow. We can use bookkeeping to search in a set, instead
+ // of a list.
+ ScopedArenaSafeMap<int, ScopedArenaSet<const art::HUseListNode<art::HInstruction*>*>>
+ uses_per_instruction_;
+
// Used to access target information.
CodeGenerator* codegen_;
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 73bdd1e223..bd33fde907 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -610,6 +610,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
}
void VisitVecMemoryOperation(HVecMemoryOperation* vec_mem_operation) override {
+ VisitVecOperation(vec_mem_operation);
StartAttributeStream("alignment") << vec_mem_operation->GetAlignment().ToString();
}
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 5a4478dc14..91be79f8ec 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -180,7 +180,7 @@ bool HInliner::Run() {
for (HBasicBlock* block : blocks) {
for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr;) {
HInstruction* next = instruction->GetNext();
- HInvoke* call = instruction->AsInvoke();
+ HInvoke* call = instruction->AsInvokeOrNull();
// As long as the call is not intrinsified, it is worth trying to inline.
if (call != nullptr && !codegen_->IsImplementedIntrinsic(call)) {
if (honor_noinline_directives) {
@@ -702,12 +702,14 @@ HInliner::InlineCacheType HInliner::GetInlineCacheAOT(
// Walk over the class descriptors and look up the actual classes.
// If we cannot find a type we return kInlineCacheMissingTypes.
ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+ Thread* self = Thread::Current();
for (const dex::TypeIndex& type_index : dex_pc_data.classes) {
const DexFile* dex_file = caller_compilation_unit_.GetDexFile();
const char* descriptor = pci->GetTypeDescriptor(dex_file, type_index);
- ObjPtr<mirror::ClassLoader> class_loader = caller_compilation_unit_.GetClassLoader().Get();
- ObjPtr<mirror::Class> clazz = class_linker->LookupResolvedType(descriptor, class_loader);
+ ObjPtr<mirror::Class> clazz =
+ class_linker->FindClass(self, descriptor, caller_compilation_unit_.GetClassLoader());
if (clazz == nullptr) {
+ self->ClearException(); // Clean up the exception left by type resolution.
VLOG(compiler) << "Could not find class from inline cache in AOT mode "
<< invoke_instruction->GetMethodReference().PrettyMethod()
<< " : "
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index fee9091145..fd599f789e 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -1364,8 +1364,7 @@ bool HInstructionBuilder::BuildInvokePolymorphic(uint32_t dex_pc,
method_reference,
resolved_method,
resolved_method_reference,
- proto_idx,
- !graph_->IsDebuggable());
+ proto_idx);
if (!HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false)) {
return false;
}
@@ -2365,9 +2364,9 @@ void HInstructionBuilder::BuildCheckedDivRem(uint16_t out_vreg,
second = LoadLocal(second_vreg_or_constant, type);
}
- if (!second_is_constant
- || (type == DataType::Type::kInt32 && second->AsIntConstant()->GetValue() == 0)
- || (type == DataType::Type::kInt64 && second->AsLongConstant()->GetValue() == 0)) {
+ if (!second_is_constant ||
+ (type == DataType::Type::kInt32 && second->AsIntConstant()->GetValue() == 0) ||
+ (type == DataType::Type::kInt64 && second->AsLongConstant()->GetValue() == 0)) {
second = new (allocator_) HDivZeroCheck(second, dex_pc);
AppendInstruction(second);
}
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 0c2fd5de56..0e2a62226f 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -1050,51 +1050,60 @@ void InstructionSimplifierVisitor::VisitSelect(HSelect* select) {
HInstruction* b = condition->InputAt(1);
DataType::Type t_type = true_value->GetType();
DataType::Type f_type = false_value->GetType();
- // Here we have a <cmp> b ? true_value : false_value.
- // Test if both values are compatible integral types (resulting MIN/MAX/ABS
- // type will be int or long, like the condition). Replacements are general,
- // but assume conditions prefer constants on the right.
if (DataType::IsIntegralType(t_type) && DataType::Kind(t_type) == DataType::Kind(f_type)) {
- // Allow a < 100 ? max(a, -100) : ..
- // or a > -100 ? min(a, 100) : ..
- // to use min/max instead of a to detect nested min/max expressions.
- HInstruction* new_a = AllowInMinMax(cmp, a, b, true_value);
- if (new_a != nullptr) {
- a = new_a;
- }
- // Try to replace typical integral MIN/MAX/ABS constructs.
- if ((cmp == kCondLT || cmp == kCondLE || cmp == kCondGT || cmp == kCondGE) &&
- ((a == true_value && b == false_value) ||
- (b == true_value && a == false_value))) {
- // Found a < b ? a : b (MIN) or a < b ? b : a (MAX)
- // or a > b ? a : b (MAX) or a > b ? b : a (MIN).
- bool is_min = (cmp == kCondLT || cmp == kCondLE) == (a == true_value);
- replace_with = NewIntegralMinMax(GetGraph()->GetAllocator(), a, b, select, is_min);
- } else if (((cmp == kCondLT || cmp == kCondLE) && true_value->IsNeg()) ||
- ((cmp == kCondGT || cmp == kCondGE) && false_value->IsNeg())) {
- bool negLeft = (cmp == kCondLT || cmp == kCondLE);
- HInstruction* the_negated = negLeft ? true_value->InputAt(0) : false_value->InputAt(0);
- HInstruction* not_negated = negLeft ? false_value : true_value;
- if (a == the_negated && a == not_negated && IsInt64Value(b, 0)) {
- // Found a < 0 ? -a : a
- // or a > 0 ? a : -a
- // which can be replaced by ABS(a).
- replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), a, select);
+ if (cmp == kCondEQ || cmp == kCondNE) {
+ // Turns
+ // * Select[a, b, EQ(a,b)] / Select[a, b, EQ(b,a)] into a
+ // * Select[a, b, NE(a,b)] / Select[a, b, NE(b,a)] into b
+ // Note that the order in EQ/NE is irrelevant.
+ if ((a == true_value && b == false_value) || (a == false_value && b == true_value)) {
+ replace_with = cmp == kCondEQ ? false_value : true_value;
+ }
+ } else {
+ // Test if both values are compatible integral types (resulting MIN/MAX/ABS
+ // type will be int or long, like the condition). Replacements are general,
+ // but assume conditions prefer constants on the right.
+
+ // Allow a < 100 ? max(a, -100) : ..
+ // or a > -100 ? min(a, 100) : ..
+ // to use min/max instead of a to detect nested min/max expressions.
+ HInstruction* new_a = AllowInMinMax(cmp, a, b, true_value);
+ if (new_a != nullptr) {
+ a = new_a;
}
- } else if (true_value->IsSub() && false_value->IsSub()) {
- HInstruction* true_sub1 = true_value->InputAt(0);
- HInstruction* true_sub2 = true_value->InputAt(1);
- HInstruction* false_sub1 = false_value->InputAt(0);
- HInstruction* false_sub2 = false_value->InputAt(1);
- if ((((cmp == kCondGT || cmp == kCondGE) &&
- (a == true_sub1 && b == true_sub2 && a == false_sub2 && b == false_sub1)) ||
- ((cmp == kCondLT || cmp == kCondLE) &&
- (a == true_sub2 && b == true_sub1 && a == false_sub1 && b == false_sub2))) &&
- AreLowerPrecisionArgs(t_type, a, b)) {
- // Found a > b ? a - b : b - a
- // or a < b ? b - a : a - b
- // which can be replaced by ABS(a - b) for lower precision operands a, b.
- replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select);
+ // Try to replace typical integral MIN/MAX/ABS constructs.
+ if ((cmp == kCondLT || cmp == kCondLE || cmp == kCondGT || cmp == kCondGE) &&
+ ((a == true_value && b == false_value) || (b == true_value && a == false_value))) {
+ // Found a < b ? a : b (MIN) or a < b ? b : a (MAX)
+ // or a > b ? a : b (MAX) or a > b ? b : a (MIN).
+ bool is_min = (cmp == kCondLT || cmp == kCondLE) == (a == true_value);
+ replace_with = NewIntegralMinMax(GetGraph()->GetAllocator(), a, b, select, is_min);
+ } else if (((cmp == kCondLT || cmp == kCondLE) && true_value->IsNeg()) ||
+ ((cmp == kCondGT || cmp == kCondGE) && false_value->IsNeg())) {
+ bool negLeft = (cmp == kCondLT || cmp == kCondLE);
+ HInstruction* the_negated = negLeft ? true_value->InputAt(0) : false_value->InputAt(0);
+ HInstruction* not_negated = negLeft ? false_value : true_value;
+ if (a == the_negated && a == not_negated && IsInt64Value(b, 0)) {
+ // Found a < 0 ? -a : a
+ // or a > 0 ? a : -a
+ // which can be replaced by ABS(a).
+ replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), a, select);
+ }
+ } else if (true_value->IsSub() && false_value->IsSub()) {
+ HInstruction* true_sub1 = true_value->InputAt(0);
+ HInstruction* true_sub2 = true_value->InputAt(1);
+ HInstruction* false_sub1 = false_value->InputAt(0);
+ HInstruction* false_sub2 = false_value->InputAt(1);
+ if ((((cmp == kCondGT || cmp == kCondGE) &&
+ (a == true_sub1 && b == true_sub2 && a == false_sub2 && b == false_sub1)) ||
+ ((cmp == kCondLT || cmp == kCondLE) &&
+ (a == true_sub2 && b == true_sub1 && a == false_sub1 && b == false_sub2))) &&
+ AreLowerPrecisionArgs(t_type, a, b)) {
+ // Found a > b ? a - b : b - a
+ // or a < b ? b - a : a - b
+ // which can be replaced by ABS(a - b) for lower precision operands a, b.
+ replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select);
+ }
}
}
}
@@ -1456,24 +1465,26 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) {
}
}
- HNeg* neg = left_is_neg ? left->AsNeg() : right->AsNeg();
- if (left_is_neg != right_is_neg && neg->HasOnlyOneNonEnvironmentUse()) {
- // Replace code looking like
- // NEG tmp, b
- // ADD dst, a, tmp
- // with
- // SUB dst, a, b
- // We do not perform the optimization if the input negation has environment
- // uses or multiple non-environment uses as it could lead to worse code. In
- // particular, we do not want the live range of `b` to be extended if we are
- // not sure the initial 'NEG' instruction can be removed.
- HInstruction* other = left_is_neg ? right : left;
- HSub* sub =
- new(GetGraph()->GetAllocator()) HSub(instruction->GetType(), other, neg->GetInput());
- instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub);
- RecordSimplification();
- neg->GetBlock()->RemoveInstruction(neg);
- return;
+ if (left_is_neg != right_is_neg) {
+ HNeg* neg = left_is_neg ? left->AsNeg() : right->AsNeg();
+ if (neg->HasOnlyOneNonEnvironmentUse()) {
+ // Replace code looking like
+ // NEG tmp, b
+ // ADD dst, a, tmp
+ // with
+ // SUB dst, a, b
+ // We do not perform the optimization if the input negation has environment
+ // uses or multiple non-environment uses as it could lead to worse code. In
+ // particular, we do not want the live range of `b` to be extended if we are
+ // not sure the initial 'NEG' instruction can be removed.
+ HInstruction* other = left_is_neg ? right : left;
+ HSub* sub =
+ new(GetGraph()->GetAllocator()) HSub(instruction->GetType(), other, neg->GetInput());
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub);
+ RecordSimplification();
+ neg->GetBlock()->RemoveInstruction(neg);
+ return;
+ }
}
if (TryReplaceWithRotate(instruction)) {
@@ -1676,7 +1687,7 @@ static bool RecognizeAndSimplifyClassCheck(HCondition* condition) {
HInstruction* input_two = condition->InputAt(1);
HLoadClass* load_class = input_one->IsLoadClass()
? input_one->AsLoadClass()
- : input_two->AsLoadClass();
+ : input_two->AsLoadClassOrNull();
if (load_class == nullptr) {
return false;
}
@@ -1688,8 +1699,8 @@ static bool RecognizeAndSimplifyClassCheck(HCondition* condition) {
}
HInstanceFieldGet* field_get = (load_class == input_one)
- ? input_two->AsInstanceFieldGet()
- : input_one->AsInstanceFieldGet();
+ ? input_two->AsInstanceFieldGetOrNull()
+ : input_one->AsInstanceFieldGetOrNull();
if (field_get == nullptr) {
return false;
}
@@ -2240,6 +2251,7 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) {
}
if (left->IsAdd()) {
+ // Cases (x + y) - y = x, and (x + y) - x = y.
// Replace code patterns looking like
// ADD dst1, x, y ADD dst1, x, y
// SUB dst2, dst1, y SUB dst2, dst1, x
@@ -2248,14 +2260,75 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) {
// SUB instruction is not needed in this case, we may use
// one of inputs of ADD instead.
// It is applicable to integral types only.
+ HAdd* add = left->AsAdd();
DCHECK(DataType::IsIntegralType(type));
- if (left->InputAt(1) == right) {
- instruction->ReplaceWith(left->InputAt(0));
+ if (add->GetRight() == right) {
+ instruction->ReplaceWith(add->GetLeft());
+ RecordSimplification();
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
+ } else if (add->GetLeft() == right) {
+ instruction->ReplaceWith(add->GetRight());
RecordSimplification();
instruction->GetBlock()->RemoveInstruction(instruction);
return;
- } else if (left->InputAt(0) == right) {
- instruction->ReplaceWith(left->InputAt(1));
+ }
+ } else if (right->IsAdd()) {
+ // Cases y - (x + y) = -x, and x - (x + y) = -y.
+ // Replace code patterns looking like
+ // ADD dst1, x, y ADD dst1, x, y
+ // SUB dst2, y, dst1 SUB dst2, x, dst1
+ // with
+ // ADD dst1, x, y ADD dst1, x, y
+ // NEG x NEG y
+ // SUB instruction is not needed in this case, we may use
+ // one of inputs of ADD instead with a NEG.
+ // It is applicable to integral types only.
+ HAdd* add = right->AsAdd();
+ DCHECK(DataType::IsIntegralType(type));
+ if (add->GetRight() == left) {
+ HNeg* neg = new (GetGraph()->GetAllocator()) HNeg(add->GetType(), add->GetLeft());
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, neg);
+ RecordSimplification();
+ return;
+ } else if (add->GetLeft() == left) {
+ HNeg* neg = new (GetGraph()->GetAllocator()) HNeg(add->GetType(), add->GetRight());
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, neg);
+ RecordSimplification();
+ return;
+ }
+ } else if (left->IsSub()) {
+ // Case (x - y) - x = -y.
+ // Replace code patterns looking like
+ // SUB dst1, x, y
+ // SUB dst2, dst1, x
+ // with
+ // SUB dst1, x, y
+ // NEG y
+ // The second SUB is not needed in this case, we may use the second input of the first SUB
+ // instead with a NEG.
+ // It is applicable to integral types only.
+ HSub* sub = left->AsSub();
+ DCHECK(DataType::IsIntegralType(type));
+ if (sub->GetLeft() == right) {
+ HNeg* neg = new (GetGraph()->GetAllocator()) HNeg(sub->GetType(), sub->GetRight());
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, neg);
+ RecordSimplification();
+ return;
+ }
+ } else if (right->IsSub()) {
+ // Case x - (x - y) = y.
+ // Replace code patterns looking like
+ // SUB dst1, x, y
+ // SUB dst2, x, dst1
+ // with
+ // SUB dst1, x, y
+ // The second SUB is not needed in this case, we may use the second input of the first SUB.
+ // It is applicable to integral types only.
+ HSub* sub = right->AsSub();
+ DCHECK(DataType::IsIntegralType(type));
+ if (sub->GetLeft() == left) {
+ instruction->ReplaceWith(sub->GetRight());
RecordSimplification();
instruction->GetBlock()->RemoveInstruction(instruction);
return;
@@ -3215,7 +3288,7 @@ bool InstructionSimplifierVisitor::TrySubtractionChainSimplification(
HInstruction* left = instruction->GetLeft();
HInstruction* right = instruction->GetRight();
// Variable names as described above.
- HConstant* const2 = right->IsConstant() ? right->AsConstant() : left->AsConstant();
+ HConstant* const2 = right->IsConstant() ? right->AsConstant() : left->AsConstantOrNull();
if (const2 == nullptr) {
return false;
}
@@ -3231,7 +3304,7 @@ bool InstructionSimplifierVisitor::TrySubtractionChainSimplification(
}
left = y->GetLeft();
- HConstant* const1 = left->IsConstant() ? left->AsConstant() : y->GetRight()->AsConstant();
+ HConstant* const1 = left->IsConstant() ? left->AsConstant() : y->GetRight()->AsConstantOrNull();
if (const1 == nullptr) {
return false;
}
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index ddc3a867b8..01489f8bcb 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -54,7 +54,7 @@ inline bool HasShifterOperand(HInstruction* instr, InstructionSet isa) {
// t3 = Sub(*, t2)
inline bool IsSubRightSubLeftShl(HSub *sub) {
HInstruction* right = sub->GetRight();
- return right->IsSub() && right->AsSub()->GetLeft()->IsShl();;
+ return right->IsSub() && right->AsSub()->GetLeft()->IsShl();
}
} // namespace helpers
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 774deec438..8357e57c1f 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -27,6 +27,7 @@
#include "gc/space/image_space.h"
#include "image-inl.h"
#include "intrinsic_objects.h"
+#include "intrinsics_list.h"
#include "nodes.h"
#include "obj_ptr-inl.h"
#include "scoped_thread_state_change-inl.h"
@@ -43,10 +44,7 @@ std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) {
case Intrinsics::k ## Name: \
os << # Name; \
break;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef STATIC_INTRINSICS_LIST
-#undef VIRTUAL_INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
}
return os;
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 893cd04411..b6c7e1b997 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -19,6 +19,7 @@
#include "base/macros.h"
#include "code_generator.h"
+#include "intrinsics_list.h"
#include "nodes.h"
#include "optimization.h"
#include "parallel_move_resolver.h"
@@ -48,9 +49,7 @@ class IntrinsicVisitor : public ValueObject {
case Intrinsics::k ## Name: \
Visit ## Name(invoke); \
return;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
// Do not put a default case. That way the compiler will complain if we missed a case.
@@ -60,11 +59,8 @@ class IntrinsicVisitor : public ValueObject {
// Define visitor methods.
#define OPTIMIZING_INTRINSICS(Name, ...) \
- virtual void Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
- }
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ virtual void Visit##Name([[maybe_unused]] HInvoke* invoke) {}
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
static void MoveArguments(HInvoke* invoke,
@@ -254,11 +250,9 @@ class VarHandleOptimizations : public IntrinsicOptimizations {
// intrinsic to exploit e.g. no side-effects or exceptions, but otherwise not handled
// by this architecture-specific intrinsics code generator. Eventually it is implemented
// as a true method call.
-#define UNIMPLEMENTED_INTRINSIC(Arch, Name) \
-void IntrinsicLocationsBuilder ## Arch::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
-} \
-void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
-}
+#define UNIMPLEMENTED_INTRINSIC(Arch, Name) \
+ void IntrinsicLocationsBuilder##Arch::Visit##Name([[maybe_unused]] HInvoke* invoke) {} \
+ void IntrinsicCodeGenerator##Arch::Visit##Name([[maybe_unused]] HInvoke* invoke) {}
// Defines a list of unreached intrinsics: that is, method calls that are recognized as
// an intrinsic, and then always converted into HIR instructions before they reach any
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index d2dbaa32e3..2ec2134fb1 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -2582,7 +2582,7 @@ static constexpr int32_t kSystemArrayCopyCharThreshold = 192;
static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
uint32_t at,
HInstruction* input) {
- HIntConstant* const_input = input->AsIntConstant();
+ HIntConstant* const_input = input->AsIntConstantOrNull();
if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) {
locations->SetInAt(at, Location::RequiresRegister());
} else {
@@ -2593,8 +2593,8 @@ static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
// Check to see if we have known failures that will cause us to have to bail out
// to the runtime, and just generate the runtime call directly.
- HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
- HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant();
+ HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
+ HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstantOrNull();
// The positions must be non-negative.
if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
@@ -2605,7 +2605,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
// The length must be >= 0 and not so long that we would (currently) prefer libcore's
// native implementation.
- HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+ HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
if (length != nullptr) {
int32_t len = length->GetValue();
if (len < 0 || len > kSystemArrayCopyCharThreshold) {
@@ -2903,8 +2903,8 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
// Check to see if we have known failures that will cause us to have to bail out
// to the runtime, and just generate the runtime call directly.
- HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
- HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+ HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
+ HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
// The positions must be non-negative.
if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
@@ -2914,7 +2914,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
}
// The length must be >= 0.
- HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+ HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
if (length != nullptr) {
int32_t len = length->GetValue();
if (len < 0 || len >= kSystemArrayCopyThreshold) {
@@ -3009,8 +3009,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
__ B(intrinsic_slow_path->GetEntryLabel(), eq);
}
// Checked when building locations.
- DCHECK(!optimizations.GetDestinationIsSource()
- || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
+ DCHECK(!optimizations.GetDestinationIsSource() ||
+ (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
} else {
if (!optimizations.GetDestinationIsSource()) {
__ Cmp(src, dest);
@@ -3676,7 +3676,7 @@ void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) {
locations->SetInAt(0, Location::Any());
}
-void IntrinsicCodeGeneratorARM64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
+void IntrinsicCodeGeneratorARM64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
void IntrinsicLocationsBuilderARM64::VisitCRC32Update(HInvoke* invoke) {
if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
@@ -4711,8 +4711,8 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
LocationFrom(target.object),
method.X(),
ArtField::DeclaringClassOffset().Int32Value(),
- /*fixup_label=*/ nullptr,
- gCompilerReadBarrierOption);
+ /*fixup_label=*/nullptr,
+ GetCompilerReadBarrierOption());
}
}
} else {
diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h
index a0ccf87f7b..b20cea65f4 100644
--- a/compiler/optimizing/intrinsics_arm64.h
+++ b/compiler/optimizing/intrinsics_arm64.h
@@ -19,6 +19,7 @@
#include "base/macros.h"
#include "intrinsics.h"
+#include "intrinsics_list.h"
namespace vixl {
namespace aarch64 {
@@ -47,9 +48,7 @@ class IntrinsicLocationsBuilderARM64 final : public IntrinsicVisitor {
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
// Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
@@ -72,9 +71,7 @@ class IntrinsicCodeGeneratorARM64 final : public IntrinsicVisitor {
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
private:
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 266b5bc799..d31593cf9f 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -1252,9 +1252,9 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
return;
}
- HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
- HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
- HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+ HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
+ HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
+ HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
locations->SetInAt(1, Location::RequiresRegister());
@@ -2653,7 +2653,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitReachabilityFence(HInvoke* invoke) {
locations->SetInAt(0, Location::Any());
}
-void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
+void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
void IntrinsicLocationsBuilderARMVIXL::VisitIntegerDivideUnsigned(HInvoke* invoke) {
CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
@@ -4351,7 +4351,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
LocationFrom(target.object),
method,
ArtField::DeclaringClassOffset().Int32Value(),
- gCompilerReadBarrierOption);
+ GetCompilerReadBarrierOption());
}
}
} else {
diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h
index 54475bcc7e..f517d21c9d 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.h
+++ b/compiler/optimizing/intrinsics_arm_vixl.h
@@ -19,6 +19,7 @@
#include "base/macros.h"
#include "intrinsics.h"
+#include "intrinsics_list.h"
#include "utils/arm/assembler_arm_vixl.h"
namespace art HIDDEN {
@@ -36,9 +37,7 @@ class IntrinsicLocationsBuilderARMVIXL final : public IntrinsicVisitor {
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
// Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
@@ -63,9 +62,7 @@ class IntrinsicCodeGeneratorARMVIXL final : public IntrinsicVisitor {
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
private:
diff --git a/compiler/optimizing/intrinsics_riscv64.cc b/compiler/optimizing/intrinsics_riscv64.cc
new file mode 100644
index 0000000000..668b3862ad
--- /dev/null
+++ b/compiler/optimizing/intrinsics_riscv64.cc
@@ -0,0 +1,354 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics_riscv64.h"
+
+#include "code_generator_riscv64.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+bool IntrinsicLocationsBuilderRISCV64::TryDispatch(HInvoke* invoke) {
+ Dispatch(invoke);
+ LocationSummary* res = invoke->GetLocations();
+ if (res == nullptr) {
+ return false;
+ }
+ return res->Intrinsified();
+}
+
+Riscv64Assembler* IntrinsicCodeGeneratorRISCV64::GetAssembler() {
+ return codegen_->GetAssembler();
+}
+
+#define __ GetAssembler()->
+
+static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+ LocationSummary* locations =
+ new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+}
+
+static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+ LocationSummary* locations =
+ new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+ CreateFPToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ __ FMvXD(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsFpuRegister<FRegister>());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+ CreateIntToFPLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ __ FMvDX(locations->Out().AsFpuRegister<FRegister>(), locations->InAt(0).AsRegister<XRegister>());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+ CreateFPToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ __ FMvXW(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsFpuRegister<FRegister>());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+ CreateIntToFPLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ __ FMvWX(locations->Out().AsFpuRegister<FRegister>(), locations->InAt(0).AsRegister<XRegister>());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitDoubleIsInfinite(HInvoke* invoke) {
+ CreateFPToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitDoubleIsInfinite(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ __ FClassD(out, locations->InAt(0).AsFpuRegister<FRegister>());
+ __ Andi(out, out, kPositiveInfinity | kNegativeInfinity);
+ __ Snez(out, out);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitFloatIsInfinite(HInvoke* invoke) {
+ CreateFPToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitFloatIsInfinite(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ __ FClassS(out, locations->InAt(0).AsFpuRegister<FRegister>());
+ __ Andi(out, out, kPositiveInfinity | kNegativeInfinity);
+ __ Snez(out, out);
+}
+
+static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+ LocationSummary* locations =
+ new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+template <typename EmitOp>
+void EmitMemoryPeek(HInvoke* invoke, EmitOp&& emit_op) {
+ LocationSummary* locations = invoke->GetLocations();
+ emit_op(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekByte(HInvoke* invoke) {
+ CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekByte(HInvoke* invoke) {
+ EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lb(rd, rs1, 0); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekIntNative(HInvoke* invoke) {
+ CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekIntNative(HInvoke* invoke) {
+ EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lw(rd, rs1, 0); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekLongNative(HInvoke* invoke) {
+ CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekLongNative(HInvoke* invoke) {
+ EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Ld(rd, rs1, 0); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekShortNative(HInvoke* invoke) {
+ CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekShortNative(HInvoke* invoke) {
+ EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lh(rd, rs1, 0); });
+}
+
+static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+ LocationSummary* locations =
+ new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+}
+
+template <typename EmitOp>
+void EmitMemoryPoke(HInvoke* invoke, EmitOp&& emit_op) {
+ LocationSummary* locations = invoke->GetLocations();
+ emit_op(locations->InAt(1).AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeByte(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeByte(HInvoke* invoke) {
+ EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sb(rs2, rs1, 0); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeIntNative(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeIntNative(HInvoke* invoke) {
+ EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sw(rs2, rs1, 0); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeLongNative(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeLongNative(HInvoke* invoke) {
+ EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sd(rs2, rs1, 0); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeShortNative(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeShortNative(HInvoke* invoke) {
+ EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sh(rs2, rs1, 0); });
+}
+
+template <typename EmitOp>
+void EmitIntegralUnOp(HInvoke* invoke, EmitOp&& emit_op) {
+ LocationSummary* locations = invoke->GetLocations();
+ emit_op(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitIntegerReverseBytes(HInvoke* invoke) {
+ CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitIntegerReverseBytes(HInvoke* invoke) {
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
+ // There is no 32-bit reverse bytes instruction.
+ __ Rev8(rd, rs1);
+ __ Srai(rd, rd, 32);
+ });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitLongReverseBytes(HInvoke* invoke) {
+ CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitLongReverseBytes(HInvoke* invoke) {
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Rev8(rd, rs1); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitShortReverseBytes(HInvoke* invoke) {
+ CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitShortReverseBytes(HInvoke* invoke) {
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
+ // There is no 16-bit reverse bytes instruction.
+ __ Rev8(rd, rs1);
+ __ Srai(rd, rd, 48);
+ });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitIntegerBitCount(HInvoke* invoke) {
+ CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitIntegerBitCount(HInvoke* invoke) {
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Cpopw(rd, rs1); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitLongBitCount(HInvoke* invoke) {
+ CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitLongBitCount(HInvoke* invoke) {
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Cpop(rd, rs1); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitIntegerHighestOneBit(HInvoke* invoke) {
+ CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitIntegerHighestOneBit(HInvoke* invoke) {
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ XRegister tmp2 = srs.AllocateXRegister();
+ __ Clzw(tmp, rs1);
+ __ Li(tmp2, INT64_C(-0x80000000));
+ __ Srlw(tmp2, tmp2, tmp);
+ __ And(rd, rs1, tmp2); // Make sure the result is zero if the input is zero.
+ });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitLongHighestOneBit(HInvoke* invoke) {
+ CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitLongHighestOneBit(HInvoke* invoke) {
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ XRegister tmp2 = srs.AllocateXRegister();
+ __ Clz(tmp, rs1);
+ __ Li(tmp2, INT64_C(-0x8000000000000000));
+ __ Srl(tmp2, tmp2, tmp);
+ __ And(rd, rs1, tmp2); // Make sure the result is zero if the input is zero.
+ });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitIntegerLowestOneBit(HInvoke* invoke) {
+ CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitIntegerLowestOneBit(HInvoke* invoke) {
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ __ NegW(tmp, rs1);
+ __ And(rd, rs1, tmp);
+ });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitLongLowestOneBit(HInvoke* invoke) {
+ CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitLongLowestOneBit(HInvoke* invoke) {
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ __ Neg(tmp, rs1);
+ __ And(rd, rs1, tmp);
+ });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+ CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Clzw(rd, rs1); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+ CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Clz(rd, rs1); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+ CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Ctzw(rd, rs1); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+ CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Ctz(rd, rs1); });
+}
+
+#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(RISCV64, Name)
+UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(MARK_UNIMPLEMENTED);
+#undef MARK_UNIMPLEMENTED
+
+UNREACHABLE_INTRINSICS(RISCV64)
+
+} // namespace riscv64
+} // namespace art
diff --git a/compiler/optimizing/intrinsics_riscv64.h b/compiler/optimizing/intrinsics_riscv64.h
new file mode 100644
index 0000000000..49c057de2b
--- /dev/null
+++ b/compiler/optimizing/intrinsics_riscv64.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_RISCV64_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_RISCV64_H_
+
+#include "base/macros.h"
+#include "intrinsics.h"
+#include "intrinsics_list.h"
+
+namespace art HIDDEN {
+
+class ArenaAllocator;
+class HInvokeStaticOrDirect;
+class HInvokeVirtual;
+
+namespace riscv64 {
+
+class CodeGeneratorRISCV64;
+class Riscv64Assembler;
+
+class IntrinsicLocationsBuilderRISCV64 final : public IntrinsicVisitor {
+ public:
+ explicit IntrinsicLocationsBuilderRISCV64(ArenaAllocator* allocator,
+ CodeGeneratorRISCV64* codegen)
+ : allocator_(allocator), codegen_(codegen) {}
+
+ // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, ...) \
+ void Visit##Name(HInvoke* invoke) override;
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef OPTIMIZING_INTRINSICS
+
+ // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
+ // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to
+ // the invoke.
+ bool TryDispatch(HInvoke* invoke);
+
+ private:
+ ArenaAllocator* const allocator_;
+ CodeGeneratorRISCV64* const codegen_;
+
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderRISCV64);
+};
+
+class IntrinsicCodeGeneratorRISCV64 final : public IntrinsicVisitor {
+ public:
+ explicit IntrinsicCodeGeneratorRISCV64(CodeGeneratorRISCV64* codegen) : codegen_(codegen) {}
+
+ // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, ...) \
+ void Visit##Name(HInvoke* invoke);
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef OPTIMIZING_INTRINSICS
+
+ private:
+ Riscv64Assembler* GetAssembler();
+
+ ArenaAllocator* GetAllocator();
+
+ CodeGeneratorRISCV64* const codegen_;
+
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorRISCV64);
+};
+
+} // namespace riscv64
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_RISCV64_H_
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index d2072201f8..02f312e74e 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -394,7 +394,6 @@ void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
}
HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
- DCHECK(static_or_direct != nullptr);
LocationSummary* locations =
new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
locations->SetInAt(0, Location::RequiresFpuRegister());
@@ -774,9 +773,9 @@ void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
// We need at least two of the positions or length to be an integer constant,
// or else we won't have enough free registers.
- HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
- HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
- HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+ HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
+ HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
+ HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
int num_constants =
((src_pos != nullptr) ? 1 : 0)
@@ -1205,7 +1204,7 @@ static void GenerateStringIndexOf(HInvoke* invoke,
HInstruction* code_point = invoke->InputAt(1);
if (code_point->IsIntConstant()) {
if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
- std::numeric_limits<uint16_t>::max()) {
+ std::numeric_limits<uint16_t>::max()) {
// Always needs the slow-path. We could directly dispatch to it, but this case should be
// rare, so for simplicity just put the full slow-path down and branch unconditionally.
slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
@@ -1445,7 +1444,7 @@ void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
Register obj = locations->InAt(0).AsRegister<Register>();
Location srcBegin = locations->InAt(1);
int srcBegin_value =
- srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
+ srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
Register srcEnd = locations->InAt(2).AsRegister<Register>();
Register dst = locations->InAt(3).AsRegister<Register>();
Register dstBegin = locations->InAt(4).AsRegister<Register>();
@@ -3504,7 +3503,7 @@ void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) {
locations->SetInAt(0, Location::Any());
}
-void IntrinsicCodeGeneratorX86::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
+void IntrinsicCodeGeneratorX86::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
void IntrinsicLocationsBuilderX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
LocationSummary* locations = new (allocator_) LocationSummary(invoke,
@@ -3781,7 +3780,7 @@ static Register GenerateVarHandleFieldReference(HInvoke* invoke,
Location::RegisterLocation(temp),
Address(temp, declaring_class_offset),
/* fixup_label= */ nullptr,
- gCompilerReadBarrierOption);
+ GetCompilerReadBarrierOption());
return temp;
}
@@ -3860,7 +3859,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) {
Address field_addr(ref, offset, TIMES_1, 0);
// Load the value from the field
- if (type == DataType::Type::kReference && gCompilerReadBarrierOption == kWithReadBarrier) {
+ if (type == DataType::Type::kReference && GetCompilerReadBarrierOption() == kWithReadBarrier) {
codegen->GenerateReferenceLoadWithBakerReadBarrier(
invoke, out, ref, field_addr, /* needs_null_check= */ false);
} else if (type == DataType::Type::kInt64 &&
diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h
index 77c236d244..fc2f0e3fbd 100644
--- a/compiler/optimizing/intrinsics_x86.h
+++ b/compiler/optimizing/intrinsics_x86.h
@@ -19,6 +19,7 @@
#include "base/macros.h"
#include "intrinsics.h"
+#include "intrinsics_list.h"
namespace art HIDDEN {
@@ -39,9 +40,7 @@ class IntrinsicLocationsBuilderX86 final : public IntrinsicVisitor {
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
// Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
@@ -64,9 +63,7 @@ class IntrinsicCodeGeneratorX86 final : public IntrinsicVisitor {
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
private:
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 9d0d5f155e..842af6b73f 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -222,34 +222,34 @@ static void GenIsInfinite(LocationSummary* locations,
double kPositiveInfinity = std::numeric_limits<double>::infinity();
double kNegativeInfinity = -1 * kPositiveInfinity;
- __ xorq(output, output);
- __ comisd(input, codegen->LiteralDoubleAddress(kPositiveInfinity));
- __ j(kNotEqual, &done1);
- __ j(kParityEven, &done2);
- __ movq(output, Immediate(1));
- __ jmp(&done2);
- __ Bind(&done1);
- __ comisd(input, codegen->LiteralDoubleAddress(kNegativeInfinity));
- __ j(kNotEqual, &done2);
- __ j(kParityEven, &done2);
- __ movq(output, Immediate(1));
- __ Bind(&done2);
+ __ xorq(output, output);
+ __ comisd(input, codegen->LiteralDoubleAddress(kPositiveInfinity));
+ __ j(kNotEqual, &done1);
+ __ j(kParityEven, &done2);
+ __ movq(output, Immediate(1));
+ __ jmp(&done2);
+ __ Bind(&done1);
+ __ comisd(input, codegen->LiteralDoubleAddress(kNegativeInfinity));
+ __ j(kNotEqual, &done2);
+ __ j(kParityEven, &done2);
+ __ movq(output, Immediate(1));
+ __ Bind(&done2);
} else {
float kPositiveInfinity = std::numeric_limits<float>::infinity();
float kNegativeInfinity = -1 * kPositiveInfinity;
- __ xorl(output, output);
- __ comiss(input, codegen->LiteralFloatAddress(kPositiveInfinity));
- __ j(kNotEqual, &done1);
- __ j(kParityEven, &done2);
- __ movl(output, Immediate(1));
- __ jmp(&done2);
- __ Bind(&done1);
- __ comiss(input, codegen->LiteralFloatAddress(kNegativeInfinity));
- __ j(kNotEqual, &done2);
- __ j(kParityEven, &done2);
- __ movl(output, Immediate(1));
- __ Bind(&done2);
+ __ xorl(output, output);
+ __ comiss(input, codegen->LiteralFloatAddress(kPositiveInfinity));
+ __ j(kNotEqual, &done1);
+ __ j(kParityEven, &done2);
+ __ movl(output, Immediate(1));
+ __ jmp(&done2);
+ __ Bind(&done1);
+ __ comiss(input, codegen->LiteralFloatAddress(kNegativeInfinity));
+ __ j(kNotEqual, &done2);
+ __ j(kParityEven, &done2);
+ __ movl(output, Immediate(1));
+ __ Bind(&done2);
}
}
@@ -617,8 +617,8 @@ void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
// Check to see if we have known failures that will cause us to have to bail out
// to the runtime, and just generate the runtime call directly.
- HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
- HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+ HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
+ HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
// The positions must be non-negative.
if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
@@ -628,7 +628,7 @@ static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
}
// The length must be > 0.
- HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+ HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
if (length != nullptr) {
int32_t len = length->GetValue();
if (len < 0) {
@@ -1424,7 +1424,7 @@ static void GenerateStringIndexOf(HInvoke* invoke,
HInstruction* code_point = invoke->InputAt(1);
if (code_point->IsIntConstant()) {
if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
- std::numeric_limits<uint16_t>::max()) {
+ std::numeric_limits<uint16_t>::max()) {
// Always needs the slow-path. We could directly dispatch to it, but this case should be
// rare, so for simplicity just put the full slow-path down and branch unconditionally.
slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
@@ -1655,7 +1655,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
Location srcBegin = locations->InAt(1);
int srcBegin_value =
- srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
+ srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>();
CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>();
CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>();
@@ -1871,7 +1871,7 @@ void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
static void GenUnsafeGet(HInvoke* invoke,
DataType::Type type,
- bool is_volatile ATTRIBUTE_UNUSED,
+ [[maybe_unused]] bool is_volatile,
CodeGeneratorX86_64* codegen) {
X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
LocationSummary* locations = invoke->GetLocations();
@@ -3249,7 +3249,7 @@ void IntrinsicLocationsBuilderX86_64::VisitReachabilityFence(HInvoke* invoke) {
locations->SetInAt(0, Location::Any());
}
-void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
+void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
static void CreateDivideUnsignedLocations(HInvoke* invoke, ArenaAllocator* allocator) {
LocationSummary* locations =
@@ -3770,8 +3770,8 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
instr_codegen->GenerateGcRootFieldLoad(invoke,
Location::RegisterLocation(target.object),
Address(method, ArtField::DeclaringClassOffset()),
- /*fixup_label=*/ nullptr,
- gCompilerReadBarrierOption);
+ /*fixup_label=*/nullptr,
+ GetCompilerReadBarrierOption());
}
}
} else {
diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h
index 59fe815a94..d0ee6f622d 100644
--- a/compiler/optimizing/intrinsics_x86_64.h
+++ b/compiler/optimizing/intrinsics_x86_64.h
@@ -19,6 +19,7 @@
#include "base/macros.h"
#include "intrinsics.h"
+#include "intrinsics_list.h"
namespace art HIDDEN {
@@ -39,9 +40,7 @@ class IntrinsicLocationsBuilderX86_64 final : public IntrinsicVisitor {
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
// Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
@@ -64,9 +63,7 @@ class IntrinsicCodeGeneratorX86_64 final : public IntrinsicVisitor {
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
private:
diff --git a/compiler/optimizing/jit_patches_arm64.cc b/compiler/optimizing/jit_patches_arm64.cc
new file mode 100644
index 0000000000..76ba182acb
--- /dev/null
+++ b/compiler/optimizing/jit_patches_arm64.cc
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generation_data.h"
+#include "gc_root.h"
+#include "jit_patches_arm64.h"
+
+namespace art HIDDEN {
+
+namespace arm64 {
+
+vixl::aarch64::Literal<uint32_t>* JitPatchesARM64::DeduplicateUint32Literal(
+ uint32_t value) {
+ return uint32_literals_.GetOrCreate(
+ value,
+ [this, value]() {
+ return GetVIXLAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(value);
+ });
+}
+
+vixl::aarch64::Literal<uint64_t>* JitPatchesARM64::DeduplicateUint64Literal(
+ uint64_t value) {
+ return uint64_literals_.GetOrCreate(
+ value,
+ [this, value]() {
+ return GetVIXLAssembler()->CreateLiteralDestroyedWithPool<uint64_t>(value);
+ });
+}
+
+static void PatchJitRootUse(uint8_t* code,
+ const uint8_t* roots_data,
+ vixl::aarch64::Literal<uint32_t>* literal,
+ uint64_t index_in_table) {
+ uint32_t literal_offset = literal->GetOffset();
+ uintptr_t address =
+ reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
+ uint8_t* data = code + literal_offset;
+ reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
+}
+
+void JitPatchesARM64::EmitJitRootPatches(
+ uint8_t* code,
+ const uint8_t* roots_data,
+ const CodeGenerationData& code_generation_data) const {
+ for (const auto& entry : jit_string_patches_) {
+ const StringReference& string_reference = entry.first;
+ vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
+ uint64_t index_in_table = code_generation_data.GetJitStringRootIndex(string_reference);
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
+ }
+ for (const auto& entry : jit_class_patches_) {
+ const TypeReference& type_reference = entry.first;
+ vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
+ uint64_t index_in_table = code_generation_data.GetJitClassRootIndex(type_reference);
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
+ }
+}
+
+vixl::aarch64::Literal<uint32_t>* JitPatchesARM64::DeduplicateBootImageAddressLiteral(
+ uint64_t address) {
+ return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address));
+}
+
+vixl::aarch64::Literal<uint32_t>* JitPatchesARM64::DeduplicateJitStringLiteral(
+ const DexFile& dex_file,
+ dex::StringIndex string_index,
+ Handle<mirror::String> handle,
+ CodeGenerationData* code_generation_data) {
+ code_generation_data->ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
+ return jit_string_patches_.GetOrCreate(
+ StringReference(&dex_file, string_index),
+ [this]() {
+ return GetVIXLAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
+ });
+}
+
+vixl::aarch64::Literal<uint32_t>* JitPatchesARM64::DeduplicateJitClassLiteral(
+ const DexFile& dex_file,
+ dex::TypeIndex type_index,
+ Handle<mirror::Class> handle,
+ CodeGenerationData* code_generation_data) {
+ code_generation_data->ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
+ return jit_class_patches_.GetOrCreate(
+ TypeReference(&dex_file, type_index),
+ [this]() {
+ return GetVIXLAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
+ });
+}
+
+} // namespace arm64
+} // namespace art
diff --git a/compiler/optimizing/jit_patches_arm64.h b/compiler/optimizing/jit_patches_arm64.h
new file mode 100644
index 0000000000..f928723f58
--- /dev/null
+++ b/compiler/optimizing/jit_patches_arm64.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_JIT_PATCHES_ARM64_H_
+#define ART_COMPILER_OPTIMIZING_JIT_PATCHES_ARM64_H_
+
+#include "base/arena_allocator.h"
+#include "base/arena_containers.h"
+#include "dex/dex_file.h"
+#include "dex/string_reference.h"
+#include "dex/type_reference.h"
+#include "handle.h"
+#include "mirror/class.h"
+#include "mirror/string.h"
+#include "utils/arm64/assembler_arm64.h"
+
+// TODO(VIXL): Make VIXL compile with -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
+#pragma GCC diagnostic pop
+
+namespace art HIDDEN {
+
+class CodeGenerationData;
+
+namespace arm64 {
+
+/**
+ * Helper for emitting string or class literals into JIT generated code,
+ * which can be shared between different compilers.
+ */
+class JitPatchesARM64 {
+ public:
+ JitPatchesARM64(Arm64Assembler* assembler, ArenaAllocator* allocator) :
+ assembler_(assembler),
+ uint32_literals_(std::less<uint32_t>(),
+ allocator->Adapter(kArenaAllocCodeGenerator)),
+ uint64_literals_(std::less<uint64_t>(),
+ allocator->Adapter(kArenaAllocCodeGenerator)),
+ jit_string_patches_(StringReferenceValueComparator(),
+ allocator->Adapter(kArenaAllocCodeGenerator)),
+ jit_class_patches_(TypeReferenceValueComparator(),
+ allocator->Adapter(kArenaAllocCodeGenerator)) {
+ }
+
+ using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>;
+ using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>;
+ using StringToLiteralMap = ArenaSafeMap<StringReference,
+ vixl::aarch64::Literal<uint32_t>*,
+ StringReferenceValueComparator>;
+ using TypeToLiteralMap = ArenaSafeMap<TypeReference,
+ vixl::aarch64::Literal<uint32_t>*,
+ TypeReferenceValueComparator>;
+
+ vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value);
+ vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
+ vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
+ vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(
+ const DexFile& dex_file,
+ dex::StringIndex string_index,
+ Handle<mirror::String> handle,
+ CodeGenerationData* code_generation_data);
+ vixl::aarch64::Literal<uint32_t>* DeduplicateJitClassLiteral(
+ const DexFile& dex_file,
+ dex::TypeIndex type_index,
+ Handle<mirror::Class> handle,
+ CodeGenerationData* code_generation_data);
+
+ void EmitJitRootPatches(uint8_t* code,
+ const uint8_t* roots_data,
+ const CodeGenerationData& code_generation_data) const;
+
+ Arm64Assembler* GetAssembler() const { return assembler_; }
+ vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
+
+ private:
+ Arm64Assembler* assembler_;
+ // Deduplication map for 32-bit literals, used for JIT for boot image addresses.
+ Uint32ToLiteralMap uint32_literals_;
+ // Deduplication map for 64-bit literals, used for JIT for method address or method code.
+ Uint64ToLiteralMap uint64_literals_;
+ // Patches for string literals in JIT compiled code.
+ StringToLiteralMap jit_string_patches_;
+ // Patches for class literals in JIT compiled code.
+ TypeToLiteralMap jit_class_patches_;
+};
+
+} // namespace arm64
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_JIT_PATCHES_ARM64_H_
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index 01daa23511..6f4f2b6cf6 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -55,6 +55,7 @@ void LinearizeTest::TestCode(const std::vector<uint16_t>& data,
}
TEST_F(LinearizeTest, CFG1) {
+ TEST_DISABLED_FOR_RISCV64();
// Structure of this graph (+ are back edges)
// Block0
// |
@@ -80,6 +81,7 @@ TEST_F(LinearizeTest, CFG1) {
}
TEST_F(LinearizeTest, CFG2) {
+ TEST_DISABLED_FOR_RISCV64();
// Structure of this graph (+ are back edges)
// Block0
// |
@@ -105,6 +107,7 @@ TEST_F(LinearizeTest, CFG2) {
}
TEST_F(LinearizeTest, CFG3) {
+ TEST_DISABLED_FOR_RISCV64();
// Structure of this graph (+ are back edges)
// Block0
// |
@@ -132,6 +135,7 @@ TEST_F(LinearizeTest, CFG3) {
}
TEST_F(LinearizeTest, CFG4) {
+ TEST_DISABLED_FOR_RISCV64();
/* Structure of this graph (+ are back edges)
// Block0
// |
@@ -162,6 +166,7 @@ TEST_F(LinearizeTest, CFG4) {
}
TEST_F(LinearizeTest, CFG5) {
+ TEST_DISABLED_FOR_RISCV64();
/* Structure of this graph (+ are back edges)
// Block0
// |
@@ -192,6 +197,7 @@ TEST_F(LinearizeTest, CFG5) {
}
TEST_F(LinearizeTest, CFG6) {
+ TEST_DISABLED_FOR_RISCV64();
// Block0
// |
// Block1
@@ -218,6 +224,7 @@ TEST_F(LinearizeTest, CFG6) {
}
TEST_F(LinearizeTest, CFG7) {
+ TEST_DISABLED_FOR_RISCV64();
// Structure of this graph (+ are back edges)
// Block0
// |
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index fb1a23eef4..7e488ba41d 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -47,6 +47,7 @@ HGraph* LiveRangesTest::BuildGraph(const std::vector<uint16_t>& data) {
}
TEST_F(LiveRangesTest, CFG1) {
+ TEST_DISABLED_FOR_RISCV64();
/*
* Test the following snippet:
* return 0;
@@ -81,6 +82,7 @@ TEST_F(LiveRangesTest, CFG1) {
}
TEST_F(LiveRangesTest, CFG2) {
+ TEST_DISABLED_FOR_RISCV64();
/*
* Test the following snippet:
* var a = 0;
@@ -125,6 +127,7 @@ TEST_F(LiveRangesTest, CFG2) {
}
TEST_F(LiveRangesTest, CFG3) {
+ TEST_DISABLED_FOR_RISCV64();
/*
* Test the following snippet:
* var a = 0;
@@ -194,6 +197,7 @@ TEST_F(LiveRangesTest, CFG3) {
}
TEST_F(LiveRangesTest, Loop1) {
+ TEST_DISABLED_FOR_RISCV64();
/*
* Test the following snippet:
* var a = 0;
@@ -270,6 +274,7 @@ TEST_F(LiveRangesTest, Loop1) {
}
TEST_F(LiveRangesTest, Loop2) {
+ TEST_DISABLED_FOR_RISCV64();
/*
* Test the following snippet:
* var a = 0;
@@ -341,6 +346,7 @@ TEST_F(LiveRangesTest, Loop2) {
}
TEST_F(LiveRangesTest, CFG4) {
+ TEST_DISABLED_FOR_RISCV64();
/*
* Test the following snippet:
* var a = 0;
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 0b421cf9e6..6af07aea4e 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -70,6 +70,7 @@ void LivenessTest::TestCode(const std::vector<uint16_t>& data, const char* expec
}
TEST_F(LivenessTest, CFG1) {
+ TEST_DISABLED_FOR_RISCV64();
const char* expected =
"Block 0\n"
" live in: (0)\n"
@@ -93,6 +94,7 @@ TEST_F(LivenessTest, CFG1) {
}
TEST_F(LivenessTest, CFG2) {
+ TEST_DISABLED_FOR_RISCV64();
const char* expected =
"Block 0\n"
" live in: (0)\n"
@@ -115,6 +117,7 @@ TEST_F(LivenessTest, CFG2) {
}
TEST_F(LivenessTest, CFG3) {
+ TEST_DISABLED_FOR_RISCV64();
const char* expected =
"Block 0\n" // entry block
" live in: (000)\n"
@@ -144,6 +147,7 @@ TEST_F(LivenessTest, CFG3) {
}
TEST_F(LivenessTest, CFG4) {
+ TEST_DISABLED_FOR_RISCV64();
// var a;
// if (0 == 0) {
// a = 5;
@@ -192,6 +196,7 @@ TEST_F(LivenessTest, CFG4) {
}
TEST_F(LivenessTest, CFG5) {
+ TEST_DISABLED_FOR_RISCV64();
// var a = 0;
// if (0 == 0) {
// } else {
@@ -237,6 +242,7 @@ TEST_F(LivenessTest, CFG5) {
}
TEST_F(LivenessTest, Loop1) {
+ TEST_DISABLED_FOR_RISCV64();
// Simple loop with one preheader and one back edge.
// var a = 0;
// while (a == a) {
@@ -283,6 +289,7 @@ TEST_F(LivenessTest, Loop1) {
}
TEST_F(LivenessTest, Loop3) {
+ TEST_DISABLED_FOR_RISCV64();
// Test that the returned value stays live in a preceding loop.
// var a = 0;
// while (a == a) {
@@ -330,6 +337,7 @@ TEST_F(LivenessTest, Loop3) {
TEST_F(LivenessTest, Loop4) {
+ TEST_DISABLED_FOR_RISCV64();
// Make sure we support a preheader of a loop not being the first predecessor
// in the predecessor list of the header.
// var a = 0;
@@ -382,6 +390,7 @@ TEST_F(LivenessTest, Loop4) {
}
TEST_F(LivenessTest, Loop5) {
+ TEST_DISABLED_FOR_RISCV64();
// Make sure we create a preheader of a loop when a header originally has two
// incoming blocks and one back edge.
// Bitsets are made of:
@@ -438,6 +447,7 @@ TEST_F(LivenessTest, Loop5) {
}
TEST_F(LivenessTest, Loop6) {
+ TEST_DISABLED_FOR_RISCV64();
// Bitsets are made of:
// (constant0, constant4, constant5, phi in block 2)
const char* expected =
@@ -489,6 +499,7 @@ TEST_F(LivenessTest, Loop6) {
TEST_F(LivenessTest, Loop7) {
+ TEST_DISABLED_FOR_RISCV64();
// Bitsets are made of:
// (constant0, constant4, constant5, phi in block 2, phi in block 6)
const char* expected =
@@ -543,6 +554,7 @@ TEST_F(LivenessTest, Loop7) {
}
TEST_F(LivenessTest, Loop8) {
+ TEST_DISABLED_FOR_RISCV64();
// var a = 0;
// while (a == a) {
// a = a + a;
diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc
index f1c50ac03c..75000c8b91 100644
--- a/compiler/optimizing/load_store_analysis.cc
+++ b/compiler/optimizing/load_store_analysis.cc
@@ -41,7 +41,7 @@ static bool CanBinaryOpAndIndexAlias(const HBinaryOperation* idx1,
// We currently only support Add and Sub operations.
return true;
}
- if (idx1->AsBinaryOperation()->GetLeastConstantLeft() != idx2) {
+ if (idx1->GetLeastConstantLeft() != idx2) {
// Cannot analyze [i+CONST1] and [j].
return true;
}
@@ -51,9 +51,9 @@ static bool CanBinaryOpAndIndexAlias(const HBinaryOperation* idx1,
// Since 'i' are the same in [i+CONST] and [i],
// further compare [CONST] and [0].
- int64_t l1 = idx1->IsAdd() ?
- idx1->GetConstantRight()->AsIntConstant()->GetValue() :
- -idx1->GetConstantRight()->AsIntConstant()->GetValue();
+ int64_t l1 = idx1->IsAdd()
+ ? idx1->GetConstantRight()->AsIntConstant()->GetValue()
+ : -idx1->GetConstantRight()->AsIntConstant()->GetValue();
int64_t l2 = 0;
int64_t h1 = l1 + (vector_length1 - 1);
int64_t h2 = l2 + (vector_length2 - 1);
@@ -68,8 +68,7 @@ static bool CanBinaryOpsAlias(const HBinaryOperation* idx1,
// We currently only support Add and Sub operations.
return true;
}
- if (idx1->AsBinaryOperation()->GetLeastConstantLeft() !=
- idx2->AsBinaryOperation()->GetLeastConstantLeft()) {
+ if (idx1->GetLeastConstantLeft() != idx2->GetLeastConstantLeft()) {
// Cannot analyze [i+CONST1] and [j+CONST2].
return true;
}
@@ -80,12 +79,12 @@ static bool CanBinaryOpsAlias(const HBinaryOperation* idx1,
// Since 'i' are the same in [i+CONST1] and [i+CONST2],
// further compare [CONST1] and [CONST2].
- int64_t l1 = idx1->IsAdd() ?
- idx1->GetConstantRight()->AsIntConstant()->GetValue() :
- -idx1->GetConstantRight()->AsIntConstant()->GetValue();
- int64_t l2 = idx2->IsAdd() ?
- idx2->GetConstantRight()->AsIntConstant()->GetValue() :
- -idx2->GetConstantRight()->AsIntConstant()->GetValue();
+ int64_t l1 = idx1->IsAdd()
+ ? idx1->GetConstantRight()->AsIntConstant()->GetValue()
+ : -idx1->GetConstantRight()->AsIntConstant()->GetValue();
+ int64_t l2 = idx2->IsAdd()
+ ? idx2->GetConstantRight()->AsIntConstant()->GetValue()
+ : -idx2->GetConstantRight()->AsIntConstant()->GetValue();
int64_t h1 = l1 + (vector_length1 - 1);
int64_t h2 = l2 + (vector_length2 - 1);
return CanIntegerRangesOverlap(l1, h1, l2, h2);
@@ -269,6 +268,13 @@ bool HeapLocationCollector::CanArrayElementsAlias(const HInstruction* idx1,
}
bool LoadStoreAnalysis::Run() {
+ // Currently load_store analysis can't handle predicated load/stores; specifically pairs of
+ // memory operations with different predicates.
+ // TODO: support predicated SIMD.
+ if (graph_->HasPredicatedSIMD()) {
+ return false;
+ }
+
for (HBasicBlock* block : graph_->GetReversePostOrder()) {
heap_location_collector_.VisitBasicBlock(block);
}
diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h
index c46a5b9cc1..ee425454a0 100644
--- a/compiler/optimizing/load_store_analysis.h
+++ b/compiler/optimizing/load_store_analysis.h
@@ -610,6 +610,7 @@ class HeapLocationCollector : public HGraphVisitor {
}
void VisitVecLoad(HVecLoad* instruction) override {
+ DCHECK(!instruction->IsPredicated());
HInstruction* array = instruction->InputAt(0);
HInstruction* index = instruction->InputAt(1);
DataType::Type type = instruction->GetPackedType();
@@ -618,6 +619,7 @@ class HeapLocationCollector : public HGraphVisitor {
}
void VisitVecStore(HVecStore* instruction) override {
+ DCHECK(!instruction->IsPredicated());
HInstruction* array = instruction->InputAt(0);
HInstruction* index = instruction->InputAt(1);
DataType::Type type = instruction->GetPackedType();
diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc
index 865febbd31..8c6812f184 100644
--- a/compiler/optimizing/load_store_analysis_test.cc
+++ b/compiler/optimizing/load_store_analysis_test.cc
@@ -897,7 +897,7 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape) {
HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
!graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
- call_left->AsInvoke()->SetRawInputAt(0, new_inst);
+ call_left->SetRawInputAt(0, new_inst);
left->AddInstruction(call_left);
left->AddInstruction(goto_left);
@@ -1007,7 +1007,7 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape2) {
HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
!graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
- call_left->AsInvoke()->SetRawInputAt(0, new_inst);
+ call_left->SetRawInputAt(0, new_inst);
left->AddInstruction(call_left);
left->AddInstruction(goto_left);
@@ -1131,7 +1131,7 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape3) {
HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
!graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
- call_left->AsInvoke()->SetRawInputAt(0, new_inst);
+ call_left->SetRawInputAt(0, new_inst);
left->AddInstruction(call_left);
left->AddInstruction(goto_left);
@@ -1412,7 +1412,7 @@ TEST_F(LoadStoreAnalysisTest, TotalEscapeAdjacentNoPredicated) {
HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
!graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
- call_left->AsInvoke()->SetRawInputAt(0, new_inst);
+ call_left->SetRawInputAt(0, new_inst);
left->AddInstruction(call_left);
left->AddInstruction(goto_left);
@@ -1514,7 +1514,7 @@ TEST_F(LoadStoreAnalysisTest, TotalEscapeAdjacent) {
HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
!graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
- call_left->AsInvoke()->SetRawInputAt(0, new_inst);
+ call_left->SetRawInputAt(0, new_inst);
left->AddInstruction(call_left);
left->AddInstruction(goto_left);
@@ -1626,7 +1626,7 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape) {
HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
!graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
- call_left->AsInvoke()->SetRawInputAt(0, new_inst);
+ call_left->SetRawInputAt(0, new_inst);
left->AddInstruction(call_left);
left->AddInstruction(goto_left);
@@ -1653,7 +1653,7 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape) {
graph_->GetDexFile(),
0);
HInstruction* goto_right = new (GetAllocator()) HGoto();
- call_right->AsInvoke()->SetRawInputAt(0, new_inst);
+ call_right->SetRawInputAt(0, new_inst);
right->AddInstruction(write_right);
right->AddInstruction(call_right);
right->AddInstruction(goto_right);
@@ -1813,7 +1813,7 @@ TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) {
HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
!graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
- call_left->AsInvoke()->SetRawInputAt(0, new_inst);
+ call_left->SetRawInputAt(0, new_inst);
high_left->AddInstruction(call_left);
high_left->AddInstruction(goto_left);
@@ -1870,7 +1870,7 @@ TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) {
HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
!graph_->IsDebuggable());
HInstruction* goto_low_left = new (GetAllocator()) HGoto();
- call_low_left->AsInvoke()->SetRawInputAt(0, new_inst);
+ call_low_left->SetRawInputAt(0, new_inst);
low_left->AddInstruction(call_low_left);
low_left->AddInstruction(goto_low_left);
@@ -2030,7 +2030,7 @@ TEST_F(LoadStoreAnalysisTest, PartialPhiPropagation1) {
HInstruction* goto_left_merge = new (GetAllocator()) HGoto();
left_phi->SetRawInputAt(0, obj_param);
left_phi->SetRawInputAt(1, new_inst);
- call_left->AsInvoke()->SetRawInputAt(0, left_phi);
+ call_left->SetRawInputAt(0, left_phi);
left_merge->AddPhi(left_phi);
left_merge->AddInstruction(call_left);
left_merge->AddInstruction(goto_left_merge);
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 9cabb12a9f..58fdd1cd05 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -1080,10 +1080,12 @@ class LSEVisitor final : private HGraphDelegateVisitor {
}
void VisitVecLoad(HVecLoad* instruction) override {
+ DCHECK(!instruction->IsPredicated());
VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
}
void VisitVecStore(HVecStore* instruction) override {
+ DCHECK(!instruction->IsPredicated());
size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
VisitSetLocation(instruction, idx, instruction->GetValue());
}
@@ -4041,6 +4043,13 @@ bool LoadStoreElimination::Run(bool enable_partial_lse) {
return false;
}
+ // Currently load_store analysis can't handle predicated load/stores; specifically pairs of
+ // memory operations with different predicates.
+ // TODO: support predicated SIMD.
+ if (graph_->HasPredicatedSIMD()) {
+ return false;
+ }
+
std::unique_ptr<LSEVisitorWrapper> lse_visitor(new (&allocator) LSEVisitorWrapper(
graph_, heap_location_collector, enable_partial_lse, stats_));
lse_visitor->Run();
diff --git a/compiler/optimizing/load_store_elimination_test.cc b/compiler/optimizing/load_store_elimination_test.cc
index 1ee109980f..d3cf8bfa2a 100644
--- a/compiler/optimizing/load_store_elimination_test.cc
+++ b/compiler/optimizing/load_store_elimination_test.cc
@@ -573,7 +573,8 @@ TEST_F(LoadStoreEliminationTest, SameHeapValue2) {
AddVecStore(entry_block_, array_, j_);
HInstruction* vstore = AddVecStore(entry_block_, array_, i_);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vstore));
@@ -589,7 +590,8 @@ TEST_F(LoadStoreEliminationTest, SameHeapValue3) {
AddVecStore(entry_block_, array_, i_add1_);
HInstruction* vstore = AddVecStore(entry_block_, array_, i_);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vstore));
@@ -634,7 +636,8 @@ TEST_F(LoadStoreEliminationTest, OverlappingLoadStore) {
AddArraySet(entry_block_, array_, i_, c1);
HInstruction* vload5 = AddVecLoad(entry_block_, array_, i_);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_TRUE(IsRemoved(load1));
@@ -668,7 +671,8 @@ TEST_F(LoadStoreEliminationTest, StoreAfterLoopWithoutSideEffects) {
// a[j] = 1;
HInstruction* array_set = AddArraySet(return_block_, array_, j_, c1);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_TRUE(IsRemoved(array_set));
@@ -701,12 +705,13 @@ TEST_F(LoadStoreEliminationTest, StoreAfterSIMDLoopWithSideEffects) {
// b[phi,phi+1,phi+2,phi+3] = a[phi,phi+1,phi+2,phi+3];
AddVecStore(loop_, array_, phi_);
HInstruction* vload = AddVecLoad(loop_, array_, phi_);
- AddVecStore(loop_, array_b, phi_, vload->AsVecLoad());
+ AddVecStore(loop_, array_b, phi_, vload);
// a[j] = 0;
HInstruction* a_set = AddArraySet(return_block_, array_, j_, c0);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_TRUE(IsRemoved(vload));
@@ -740,12 +745,13 @@ TEST_F(LoadStoreEliminationTest, LoadAfterSIMDLoopWithSideEffects) {
// b[phi,phi+1,phi+2,phi+3] = a[phi,phi+1,phi+2,phi+3];
AddVecStore(loop_, array_, phi_);
HInstruction* vload = AddVecLoad(loop_, array_, phi_);
- AddVecStore(loop_, array_b, phi_, vload->AsVecLoad());
+ AddVecStore(loop_, array_b, phi_, vload);
// x = a[j];
HInstruction* load = AddArrayGet(return_block_, array_, j_);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_TRUE(IsRemoved(vload));
@@ -786,7 +792,8 @@ TEST_F(LoadStoreEliminationTest, MergePredecessorVecStores) {
// down: a[i,... i + 3] = [1,...1]
HInstruction* vstore4 = AddVecStore(down, array_, i_, vdata);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_TRUE(IsRemoved(vstore2));
@@ -874,10 +881,11 @@ TEST_F(LoadStoreEliminationTest, RedundantVStoreVLoadInLoop) {
// a[i,... i + 3] = [1,...1]
HInstruction* vstore1 = AddVecStore(loop_, array_a, phi_);
HInstruction* vload = AddVecLoad(loop_, array_a, phi_);
- HInstruction* vstore2 = AddVecStore(loop_, array_b, phi_, vload->AsVecLoad());
+ HInstruction* vstore2 = AddVecStore(loop_, array_b, phi_, vload);
HInstruction* vstore3 = AddVecStore(loop_, array_a, phi_, vstore1->InputAt(2));
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vstore1));
@@ -963,9 +971,10 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueInLoopWithoutWriteSideEffects)
// v = a[i,... i + 3]
// array[0,... 3] = v
HInstruction* vload = AddVecLoad(loop_, array_a, phi_);
- HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
+ HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload));
@@ -987,9 +996,10 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValue) {
// v = a[0,... 3]
// array[0,... 3] = v
HInstruction* vload = AddVecLoad(pre_header_, array_a, c0);
- HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
+ HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload));
@@ -1063,10 +1073,11 @@ TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValueInLoopWithoutWriteSideE
// array[0] = v1
HInstruction* vload = AddVecLoad(loop_, array_a, phi_);
HInstruction* load = AddArrayGet(loop_, array_a, phi_);
- HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
+ HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload);
HInstruction* store = AddArraySet(return_block_, array_, c0, load);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload));
@@ -1094,10 +1105,11 @@ TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValue) {
// array[0] = v1
HInstruction* vload = AddVecLoad(pre_header_, array_a, c0);
HInstruction* load = AddArrayGet(pre_header_, array_a, c0);
- HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
+ HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload);
HInstruction* store = AddArraySet(return_block_, array_, c0, load);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload));
@@ -1126,10 +1138,11 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoadInLoopWithoutWriteSide
// array[128,... 131] = v1
HInstruction* vload1 = AddVecLoad(loop_, array_a, phi_);
HInstruction* vload2 = AddVecLoad(loop_, array_a, phi_);
- HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad());
- HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad());
+ HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1);
+ HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload1));
@@ -1157,10 +1170,11 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoad) {
// array[128,... 131] = v1
HInstruction* vload1 = AddVecLoad(pre_header_, array_a, c0);
HInstruction* vload2 = AddVecLoad(pre_header_, array_a, c0);
- HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad());
- HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad());
+ HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1);
+ HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload1));
@@ -2139,9 +2153,9 @@ TEST_F(LoadStoreEliminationTest, PartialLoadElimination) {
right->AddInstruction(read_right);
right->AddInstruction(goto_right);
- HInstruction* phi_final = MakePhi({read_left, read_right});
+ HPhi* phi_final = MakePhi({read_left, read_right});
HInstruction* return_exit = new (GetAllocator()) HReturn(phi_final);
- exit->AddPhi(phi_final->AsPhi());
+ exit->AddPhi(phi_final);
exit->AddInstruction(return_exit);
// PerformLSE expects this to be empty.
@@ -5153,7 +5167,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonAfterCohort) {
CheckFinalInstruction(if_merge->InputAt(0), ComparisonPlacement::kAfterEscape);
EXPECT_INS_EQ(init_set->InputAt(1), c3);
ASSERT_TRUE(write_partial->InputAt(0)->IsPhi());
- EXPECT_INS_EQ(write_partial->InputAt(0)->AsPhi()->InputAt(0), init_set->InputAt(0));
+ EXPECT_INS_EQ(write_partial->InputAt(0)->InputAt(0), init_set->InputAt(0));
EXPECT_INS_EQ(write_partial->InputAt(1), c4);
EXPECT_INS_EQ(pred_get->GetTarget(), merge_alloc);
EXPECT_INS_EQ(pred_get->GetDefaultValue(), merge_value_return);
@@ -5225,14 +5239,14 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonInCohortAfterEscape) {
HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
ComparisonInstructions cmp_instructions = GetComparisonInstructions(new_inst);
- HInstruction* if_left = new (GetAllocator()) HIf(cmp_instructions.cmp_);
+ HIf* if_left = new (GetAllocator()) HIf(cmp_instructions.cmp_);
left->AddInstruction(call_left);
cmp_instructions.AddSetup(left);
left->AddInstruction(cmp_instructions.cmp_);
left->AddInstruction(if_left);
call_left->CopyEnvironmentFrom(cls->GetEnvironment());
cmp_instructions.AddEnvironment(cls->GetEnvironment());
- if (if_left->AsIf()->IfTrueSuccessor() != partial) {
+ if (if_left->IfTrueSuccessor() != partial) {
left->SwapSuccessors();
}
@@ -5381,7 +5395,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedStore1) {
right->AddInstruction(write_right);
right->AddInstruction(goto_right);
- HInstruction* write_bottom = MakeIFieldSet(new_inst, c3, MemberOffset(32));
+ HInstanceFieldSet* write_bottom = MakeIFieldSet(new_inst, c3, MemberOffset(32));
HInstruction* return_exit = new (GetAllocator()) HReturnVoid();
breturn->AddInstruction(write_bottom);
breturn->AddInstruction(return_exit);
@@ -5391,7 +5405,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedStore1) {
PerformLSEWithPartial(blks);
EXPECT_INS_RETAINED(write_bottom);
- EXPECT_TRUE(write_bottom->AsInstanceFieldSet()->GetIsPredicatedSet());
+ EXPECT_TRUE(write_bottom->GetIsPredicatedSet());
EXPECT_INS_REMOVED(write_right);
EXPECT_INS_RETAINED(call_left);
HPhi* merge_alloc = FindSingleInstruction<HPhi>(graph_, breturn);
@@ -5491,7 +5505,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedStore2) {
non_escape->AddInstruction(non_escape_goto);
non_escape_call->CopyEnvironmentFrom(cls->GetEnvironment());
- HInstruction* write_bottom = MakeIFieldSet(new_inst, c4, MemberOffset(32));
+ HInstanceFieldSet* write_bottom = MakeIFieldSet(new_inst, c4, MemberOffset(32));
HInstruction* return_exit = new (GetAllocator()) HReturnVoid();
breturn->AddInstruction(write_bottom);
breturn->AddInstruction(return_exit);
@@ -5501,7 +5515,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedStore2) {
PerformLSEWithPartial(blks);
EXPECT_INS_RETAINED(write_bottom);
- EXPECT_TRUE(write_bottom->AsInstanceFieldSet()->GetIsPredicatedSet()) << *write_bottom;
+ EXPECT_TRUE(write_bottom->GetIsPredicatedSet()) << *write_bottom;
EXPECT_INS_REMOVED(write_right);
EXPECT_INS_RETAINED(call_left);
HInstanceFieldSet* pred_set = FindSingleInstruction<HInstanceFieldSet>(graph_, breturn);
@@ -6786,14 +6800,14 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis1) {
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
EXPECT_INS_REMOVED(read_bottom) << *read_bottom;
ASSERT_TRUE(pred_get != nullptr);
- HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi();
+ HPhi* inst_return_phi = pred_get->GetTarget()->AsPhiOrNull();
ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs();
EXPECT_INS_EQ(inst_return_phi->InputAt(0),
FindSingleInstruction<HNewInstance>(graph_, case1->GetSinglePredecessor()));
EXPECT_INS_EQ(inst_return_phi->InputAt(1),
FindSingleInstruction<HNewInstance>(graph_, case2->GetSinglePredecessor()));
EXPECT_INS_EQ(inst_return_phi->InputAt(2), graph_->GetNullConstant());
- HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi();
+ HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhiOrNull();
ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs();
EXPECT_INS_EQ(inst_value_phi->InputAt(0), graph_->GetIntConstant(0));
EXPECT_INS_EQ(inst_value_phi->InputAt(1), graph_->GetIntConstant(0));
@@ -6966,14 +6980,14 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis2) {
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
EXPECT_INS_REMOVED(read_bottom) << *read_bottom;
ASSERT_TRUE(pred_get != nullptr);
- HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi();
+ HPhi* inst_return_phi = pred_get->GetTarget()->AsPhiOrNull();
ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs();
EXPECT_INS_EQ(inst_return_phi->InputAt(0),
FindSingleInstruction<HNewInstance>(graph_, case1->GetSinglePredecessor()));
EXPECT_INS_EQ(inst_return_phi->InputAt(1),
FindSingleInstruction<HNewInstance>(graph_, case2->GetSinglePredecessor()));
EXPECT_INS_EQ(inst_return_phi->InputAt(2), graph_->GetNullConstant());
- HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi();
+ HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhiOrNull();
ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs();
EXPECT_INS_EQ(inst_value_phi->InputAt(0), graph_->GetIntConstant(0));
EXPECT_INS_EQ(inst_value_phi->InputAt(1), graph_->GetIntConstant(0));
@@ -7113,12 +7127,12 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis3) {
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
EXPECT_INS_REMOVED(read_bottom) << *read_bottom;
ASSERT_TRUE(pred_get != nullptr);
- HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi();
+ HPhi* inst_return_phi = pred_get->GetTarget()->AsPhiOrNull();
ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs();
EXPECT_INS_EQ(inst_return_phi->InputAt(0), graph_->GetNullConstant());
EXPECT_INS_EQ(inst_return_phi->InputAt(1),
FindSingleInstruction<HNewInstance>(graph_, escape->GetSinglePredecessor()));
- HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi();
+ HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhiOrNull();
ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs();
HPhi* loop_header_phi = FindSingleInstruction<HPhi>(graph_, loop_header);
HPhi* loop_merge_phi = FindSingleInstruction<HPhi>(graph_, loop_merge);
@@ -7213,7 +7227,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis4) {
HInstruction* goto_no_escape = new (GetAllocator()) HGoto();
no_escape->AddInstruction(goto_no_escape);
- HInstruction* write_pre_header = MakeIFieldSet(new_inst, c3, MemberOffset(32));
+ HInstanceFieldSet* write_pre_header = MakeIFieldSet(new_inst, c3, MemberOffset(32));
HInstruction* goto_preheader = new (GetAllocator()) HGoto();
loop_pre_header->AddInstruction(write_pre_header);
loop_pre_header->AddInstruction(goto_preheader);
@@ -7236,7 +7250,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis4) {
HInstruction* goto_loop_left = new (GetAllocator()) HGoto();
loop_if_left->AddInstruction(goto_loop_left);
- HInstruction* write_loop_right = MakeIFieldSet(new_inst, c5, MemberOffset(32));
+ HInstanceFieldSet* write_loop_right = MakeIFieldSet(new_inst, c5, MemberOffset(32));
HInstruction* goto_loop_right = new (GetAllocator()) HGoto();
loop_if_right->AddInstruction(write_loop_right);
loop_if_right->AddInstruction(goto_loop_right);
@@ -7257,12 +7271,12 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis4) {
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
EXPECT_INS_REMOVED(read_bottom) << *read_bottom;
ASSERT_TRUE(pred_get != nullptr);
- HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi();
+ HPhi* inst_return_phi = pred_get->GetTarget()->AsPhiOrNull();
ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs();
EXPECT_INS_EQ(inst_return_phi->InputAt(0), graph_->GetNullConstant());
EXPECT_INS_EQ(inst_return_phi->InputAt(1),
FindSingleInstruction<HNewInstance>(graph_, escape->GetSinglePredecessor()));
- HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi();
+ HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhiOrNull();
ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs();
HPhi* loop_header_phi = FindSingleInstruction<HPhi>(graph_, loop_header);
HPhi* loop_merge_phi = FindSingleInstruction<HPhi>(graph_, loop_merge);
@@ -7272,9 +7286,9 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis4) {
EXPECT_INS_EQ(loop_merge_phi->InputAt(0), loop_header_phi);
EXPECT_INS_EQ(loop_merge_phi->InputAt(1), c5);
EXPECT_INS_RETAINED(write_loop_right) << *write_loop_right;
- EXPECT_TRUE(write_loop_right->AsInstanceFieldSet()->GetIsPredicatedSet()) << *write_loop_right;
+ EXPECT_TRUE(write_loop_right->GetIsPredicatedSet()) << *write_loop_right;
EXPECT_INS_RETAINED(write_pre_header) << *write_pre_header;
- EXPECT_TRUE(write_pre_header->AsInstanceFieldSet()->GetIsPredicatedSet()) << *write_pre_header;
+ EXPECT_TRUE(write_pre_header->GetIsPredicatedSet()) << *write_pre_header;
}
// // ENTRY
@@ -7401,12 +7415,12 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis5) {
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
EXPECT_INS_REMOVED(read_bottom) << *read_bottom;
ASSERT_TRUE(pred_get != nullptr);
- HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi();
+ HPhi* inst_return_phi = pred_get->GetTarget()->AsPhiOrNull();
ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs();
EXPECT_INS_EQ(inst_return_phi->InputAt(0), graph_->GetNullConstant());
EXPECT_INS_EQ(inst_return_phi->InputAt(1),
FindSingleInstruction<HNewInstance>(graph_, escape->GetSinglePredecessor()));
- HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi();
+ HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhiOrNull();
ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs();
HPhi* loop_header_phi = FindSingleInstruction<HPhi>(graph_, loop_header);
HPhi* loop_merge_phi = FindSingleInstruction<HPhi>(graph_, loop_merge);
@@ -7562,7 +7576,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis6) {
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
EXPECT_INS_REMOVED(read_bottom) << *read_bottom;
ASSERT_TRUE(pred_get != nullptr);
- HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi();
+ HPhi* inst_return_phi = pred_get->GetTarget()->AsPhiOrNull();
ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs();
EXPECT_INS_EQ(inst_return_phi->InputAt(0),
FindSingleInstruction<HNewInstance>(graph_, escape->GetSinglePredecessor()));
@@ -8268,13 +8282,13 @@ TEST_P(UsesOrderDependentTestGroup, RecordPredicatedReplacements1) {
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, middle);
ASSERT_NE(replacement_middle_read, nullptr);
ASSERT_TRUE(replacement_middle_read->GetTarget()->IsPhi());
- ASSERT_EQ(2u, replacement_middle_read->GetTarget()->AsPhi()->InputCount());
- ASSERT_INS_EQ(replacement_middle_read->GetTarget()->AsPhi()->InputAt(0), replacement_new_inst);
- ASSERT_INS_EQ(replacement_middle_read->GetTarget()->AsPhi()->InputAt(1), cnull);
+ ASSERT_EQ(2u, replacement_middle_read->GetTarget()->InputCount());
+ ASSERT_INS_EQ(replacement_middle_read->GetTarget()->InputAt(0), replacement_new_inst);
+ ASSERT_INS_EQ(replacement_middle_read->GetTarget()->InputAt(1), cnull);
ASSERT_TRUE(replacement_middle_read->GetDefaultValue()->IsPhi());
- ASSERT_EQ(2u, replacement_middle_read->GetDefaultValue()->AsPhi()->InputCount());
- ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->AsPhi()->InputAt(0), c0);
- ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->AsPhi()->InputAt(1), c11);
+ ASSERT_EQ(2u, replacement_middle_read->GetDefaultValue()->InputCount());
+ ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->InputAt(0), c0);
+ ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->InputAt(1), c11);
EXPECT_INS_RETAINED(left2_write);
ASSERT_TRUE(left2_write->GetIsPredicatedSet());
@@ -8285,9 +8299,9 @@ TEST_P(UsesOrderDependentTestGroup, RecordPredicatedReplacements1) {
ASSERT_NE(replacement_breturn_read, nullptr);
ASSERT_INS_EQ(replacement_breturn_read->GetTarget(), replacement_middle_read->GetTarget());
ASSERT_TRUE(replacement_breturn_read->GetDefaultValue()->IsPhi());
- ASSERT_EQ(2u, replacement_breturn_read->GetDefaultValue()->AsPhi()->InputCount());
- ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->AsPhi()->InputAt(0), c33);
- HInstruction* other_input = replacement_breturn_read->GetDefaultValue()->AsPhi()->InputAt(1);
+ ASSERT_EQ(2u, replacement_breturn_read->GetDefaultValue()->InputCount());
+ ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->InputAt(0), c33);
+ HInstruction* other_input = replacement_breturn_read->GetDefaultValue()->InputAt(1);
ASSERT_NE(other_input->GetBlock(), nullptr) << GetParam();
ASSERT_INS_EQ(other_input, replacement_middle_read);
}
@@ -8423,13 +8437,13 @@ TEST_P(UsesOrderDependentTestGroup, RecordPredicatedReplacements2) {
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, middle);
ASSERT_NE(replacement_middle_read, nullptr);
ASSERT_TRUE(replacement_middle_read->GetTarget()->IsPhi());
- ASSERT_EQ(2u, replacement_middle_read->GetTarget()->AsPhi()->InputCount());
- ASSERT_INS_EQ(replacement_middle_read->GetTarget()->AsPhi()->InputAt(0), replacement_new_inst);
- ASSERT_INS_EQ(replacement_middle_read->GetTarget()->AsPhi()->InputAt(1), cnull);
+ ASSERT_EQ(2u, replacement_middle_read->GetTarget()->InputCount());
+ ASSERT_INS_EQ(replacement_middle_read->GetTarget()->InputAt(0), replacement_new_inst);
+ ASSERT_INS_EQ(replacement_middle_read->GetTarget()->InputAt(1), cnull);
ASSERT_TRUE(replacement_middle_read->GetDefaultValue()->IsPhi());
- ASSERT_EQ(2u, replacement_middle_read->GetDefaultValue()->AsPhi()->InputCount());
- ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->AsPhi()->InputAt(0), c0);
- ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->AsPhi()->InputAt(1), c11);
+ ASSERT_EQ(2u, replacement_middle_read->GetDefaultValue()->InputCount());
+ ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->InputAt(0), c0);
+ ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->InputAt(1), c11);
EXPECT_INS_RETAINED(left2_call);
@@ -8627,13 +8641,13 @@ TEST_P(UsesOrderDependentTestGroupForThreeItems, RecordPredicatedReplacements3)
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, middle1);
ASSERT_NE(replacement_middle1_read, nullptr);
ASSERT_TRUE(replacement_middle1_read->GetTarget()->IsPhi());
- ASSERT_EQ(2u, replacement_middle1_read->GetTarget()->AsPhi()->InputCount());
- ASSERT_INS_EQ(replacement_middle1_read->GetTarget()->AsPhi()->InputAt(0), replacement_new_inst);
- ASSERT_INS_EQ(replacement_middle1_read->GetTarget()->AsPhi()->InputAt(1), cnull);
+ ASSERT_EQ(2u, replacement_middle1_read->GetTarget()->InputCount());
+ ASSERT_INS_EQ(replacement_middle1_read->GetTarget()->InputAt(0), replacement_new_inst);
+ ASSERT_INS_EQ(replacement_middle1_read->GetTarget()->InputAt(1), cnull);
ASSERT_TRUE(replacement_middle1_read->GetDefaultValue()->IsPhi());
- ASSERT_EQ(2u, replacement_middle1_read->GetDefaultValue()->AsPhi()->InputCount());
- ASSERT_INS_EQ(replacement_middle1_read->GetDefaultValue()->AsPhi()->InputAt(0), c0);
- ASSERT_INS_EQ(replacement_middle1_read->GetDefaultValue()->AsPhi()->InputAt(1), c11);
+ ASSERT_EQ(2u, replacement_middle1_read->GetDefaultValue()->InputCount());
+ ASSERT_INS_EQ(replacement_middle1_read->GetDefaultValue()->InputAt(0), c0);
+ ASSERT_INS_EQ(replacement_middle1_read->GetDefaultValue()->InputAt(1), c11);
EXPECT_INS_RETAINED(left2_call);
@@ -8652,11 +8666,10 @@ TEST_P(UsesOrderDependentTestGroupForThreeItems, RecordPredicatedReplacements3)
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
ASSERT_NE(replacement_breturn_read, nullptr);
ASSERT_INS_EQ(replacement_breturn_read->GetTarget(), replacement_middle1_read->GetTarget());
- ASSERT_EQ(2u, replacement_breturn_read->GetDefaultValue()->AsPhi()->InputCount());
- ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->AsPhi()->InputAt(0),
- replacement_left3_read);
- ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->AsPhi()->InputAt(1),
- replacement_middle1_read);
+ ASSERT_TRUE(replacement_breturn_read->GetDefaultValue()->IsPhi());
+ ASSERT_EQ(2u, replacement_breturn_read->GetDefaultValue()->InputCount());
+ ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->InputAt(0), replacement_left3_read);
+ ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->InputAt(1), replacement_middle1_read);
EXPECT_INS_RETAINED(breturn_add1);
ASSERT_INS_EQ(breturn_add1->InputAt(0), replacement_middle1_read);
ASSERT_INS_EQ(breturn_add1->InputAt(1), replacement_breturn_read);
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index f40b7f4f0c..4189bc4053 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -62,7 +62,7 @@ Location Location::RegisterOrConstant(HInstruction* instruction) {
}
Location Location::RegisterOrInt32Constant(HInstruction* instruction) {
- HConstant* constant = instruction->AsConstant();
+ HConstant* constant = instruction->AsConstantOrNull();
if (constant != nullptr) {
int64_t value = CodeGenerator::GetInt64ValueOf(constant);
if (IsInt<32>(value)) {
@@ -73,7 +73,7 @@ Location Location::RegisterOrInt32Constant(HInstruction* instruction) {
}
Location Location::FpuRegisterOrInt32Constant(HInstruction* instruction) {
- HConstant* constant = instruction->AsConstant();
+ HConstant* constant = instruction->AsConstantOrNull();
if (constant != nullptr) {
int64_t value = CodeGenerator::GetInt64ValueOf(constant);
if (IsInt<32>(value)) {
diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc
index 95e81533da..6163624a97 100644
--- a/compiler/optimizing/loop_analysis.cc
+++ b/compiler/optimizing/loop_analysis.cc
@@ -42,7 +42,7 @@ void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info,
// not cause loop peeling to happen as they either cannot be inside a loop, or by
// definition cannot be loop exits (unconditional instructions), or are not beneficial for
// the optimization.
- HIf* hif = block->GetLastInstruction()->AsIf();
+ HIf* hif = block->GetLastInstruction()->AsIfOrNull();
if (hif != nullptr && !loop_info->Contains(*hif->InputAt(0)->GetBlock())) {
analysis_results->invariant_exits_num_++;
}
@@ -259,7 +259,7 @@ class X86_64LoopHelper : public ArchDefaultLoopHelper {
case HInstruction::InstructionKind::kVecReplicateScalar:
return 2;
case HInstruction::InstructionKind::kVecExtractScalar:
- return 1;
+ return 1;
case HInstruction::InstructionKind::kVecReduce:
return 4;
case HInstruction::InstructionKind::kVecNeg:
diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h
index cec00fecf4..cd8f00588d 100644
--- a/compiler/optimizing/loop_analysis.h
+++ b/compiler/optimizing/loop_analysis.h
@@ -148,13 +148,15 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> {
//
// Returns 'true' by default, should be overridden by particular target loop helper.
virtual bool IsLoopNonBeneficialForScalarOpts(
- LoopAnalysisInfo* loop_analysis_info ATTRIBUTE_UNUSED) const { return true; }
+ [[maybe_unused]] LoopAnalysisInfo* loop_analysis_info) const {
+ return true;
+ }
// Returns optimal scalar unrolling factor for the loop.
//
// Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper.
virtual uint32_t GetScalarUnrollingFactor(
- const LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const {
+ [[maybe_unused]] const LoopAnalysisInfo* analysis_info) const {
return LoopAnalysisInfo::kNoUnrollingFactor;
}
@@ -166,17 +168,17 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> {
// Returns whether it is beneficial to fully unroll the loop.
//
// Returns 'false' by default, should be overridden by particular target loop helper.
- virtual bool IsFullUnrollingBeneficial(LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const {
+ virtual bool IsFullUnrollingBeneficial([[maybe_unused]] LoopAnalysisInfo* analysis_info) const {
return false;
}
// Returns optimal SIMD unrolling factor for the loop.
//
// Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper.
- virtual uint32_t GetSIMDUnrollingFactor(HBasicBlock* block ATTRIBUTE_UNUSED,
- int64_t trip_count ATTRIBUTE_UNUSED,
- uint32_t max_peel ATTRIBUTE_UNUSED,
- uint32_t vector_length ATTRIBUTE_UNUSED) const {
+ virtual uint32_t GetSIMDUnrollingFactor([[maybe_unused]] HBasicBlock* block,
+ [[maybe_unused]] int64_t trip_count,
+ [[maybe_unused]] uint32_t max_peel,
+ [[maybe_unused]] uint32_t vector_length) const {
return LoopAnalysisInfo::kNoUnrollingFactor;
}
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 7a52502562..f6d69ca789 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -366,8 +366,8 @@ static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
return (restrictions & tested) != 0;
}
-// Insert an instruction.
-static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
+// Insert an instruction at the end of the block, with safe checks.
+inline HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
DCHECK(block != nullptr);
DCHECK(instruction != nullptr);
block->InsertInstructionBefore(instruction, block->GetLastInstruction());
@@ -418,7 +418,7 @@ static void TryToEvaluateIfCondition(HIf* instruction, HGraph* graph) {
++it;
if (true_succ->Dominates(user_block)) {
user->ReplaceInput(graph->GetIntConstant(1), index);
- } else if (false_succ->Dominates(user_block)) {
+ } else if (false_succ->Dominates(user_block)) {
user->ReplaceInput(graph->GetIntConstant(0), index);
}
}
@@ -453,6 +453,54 @@ static DataType::Type GetNarrowerType(HInstruction* a, HInstruction* b) {
return type;
}
+// Returns whether the loop is of a diamond structure:
+//
+// header <----------------+
+// | |
+// diamond_hif |
+// / \ |
+// diamond_true diamond_false |
+// \ / |
+// back_edge |
+// | |
+// +---------------------+
+static bool HasLoopDiamondStructure(HLoopInformation* loop_info) {
+ HBasicBlock* header = loop_info->GetHeader();
+ if (loop_info->NumberOfBackEdges() != 1 || header->GetSuccessors().size() != 2) {
+ return false;
+ }
+ HBasicBlock* header_succ_0 = header->GetSuccessors()[0];
+ HBasicBlock* header_succ_1 = header->GetSuccessors()[1];
+ HBasicBlock* diamond_top = loop_info->Contains(*header_succ_0) ?
+ header_succ_0 :
+ header_succ_1;
+ if (!diamond_top->GetLastInstruction()->IsIf()) {
+ return false;
+ }
+
+ HIf* diamond_hif = diamond_top->GetLastInstruction()->AsIf();
+ HBasicBlock* diamond_true = diamond_hif->IfTrueSuccessor();
+ HBasicBlock* diamond_false = diamond_hif->IfFalseSuccessor();
+
+ if (diamond_true->GetSuccessors().size() != 1 || diamond_false->GetSuccessors().size() != 1) {
+ return false;
+ }
+
+ HBasicBlock* back_edge = diamond_true->GetSingleSuccessor();
+ if (back_edge != diamond_false->GetSingleSuccessor() ||
+ back_edge != loop_info->GetBackEdges()[0]) {
+ return false;
+ }
+
+ DCHECK_EQ(loop_info->GetBlocks().NumSetBits(), 5u);
+ return true;
+}
+
+static bool IsPredicatedLoopControlFlowSupported(HLoopInformation* loop_info) {
+ size_t num_of_blocks = loop_info->GetBlocks().NumSetBits();
+ return num_of_blocks == 2 || HasLoopDiamondStructure(loop_info);
+}
+
//
// Public methods.
//
@@ -482,6 +530,8 @@ HLoopOptimization::HLoopOptimization(HGraph* graph,
vector_runtime_test_b_(nullptr),
vector_map_(nullptr),
vector_permanent_map_(nullptr),
+ vector_external_set_(nullptr),
+ predicate_info_map_(nullptr),
vector_mode_(kSequential),
vector_preheader_(nullptr),
vector_header_(nullptr),
@@ -542,12 +592,17 @@ bool HLoopOptimization::LocalRun() {
std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
ScopedArenaSafeMap<HInstruction*, HInstruction*> perm(
std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+ ScopedArenaSet<HInstruction*> ext_set(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+ ScopedArenaSafeMap<HBasicBlock*, BlockPredicateInfo*> pred(
+ std::less<HBasicBlock*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
// Attach.
iset_ = &iset;
reductions_ = &reds;
vector_refs_ = &refs;
vector_map_ = &map;
vector_permanent_map_ = &perm;
+ vector_external_set_ = &ext_set;
+ predicate_info_map_ = &pred;
// Traverse.
const bool did_loop_opt = TraverseLoopsInnerToOuter(top_loop_);
// Detach.
@@ -556,6 +611,9 @@ bool HLoopOptimization::LocalRun() {
vector_refs_ = nullptr;
vector_map_ = nullptr;
vector_permanent_map_ = nullptr;
+ vector_external_set_ = nullptr;
+ predicate_info_map_ = nullptr;
+
return did_loop_opt;
}
@@ -787,6 +845,37 @@ void HLoopOptimization::SimplifyBlocks(LoopNode* node) {
}
}
+// Checks whether the loop has exit structure suitable for InnerLoopFinite optimization:
+// - has single loop exit.
+// - the exit block has only single predecessor - a block inside the loop.
+//
+// In that case returns single exit basic block (outside the loop); otherwise nullptr.
+static HBasicBlock* GetInnerLoopFiniteSingleExit(HLoopInformation* loop_info) {
+ HBasicBlock* exit = nullptr;
+ for (HBlocksInLoopIterator block_it(*loop_info);
+ !block_it.Done();
+ block_it.Advance()) {
+ HBasicBlock* block = block_it.Current();
+
+ // Check whether one of the successor is loop exit.
+ for (HBasicBlock* successor : block->GetSuccessors()) {
+ if (!loop_info->Contains(*successor)) {
+ if (exit != nullptr) {
+ // The loop has more than one exit.
+ return nullptr;
+ }
+ exit = successor;
+
+ // Ensure exit can only be reached by exiting loop.
+ if (successor->GetPredecessors().size() != 1) {
+ return nullptr;
+ }
+ }
+ }
+ }
+ return exit;
+}
+
bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {
HBasicBlock* header = node->loop_info->GetHeader();
HBasicBlock* preheader = node->loop_info->GetPreHeader();
@@ -795,33 +884,22 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {
if (!induction_range_.IsFinite(node->loop_info, &trip_count)) {
return false;
}
- // Ensure there is only a single loop-body (besides the header).
- HBasicBlock* body = nullptr;
- for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
- if (it.Current() != header) {
- if (body != nullptr) {
- return false;
- }
- body = it.Current();
- }
- }
- CHECK(body != nullptr);
- // Ensure there is only a single exit point.
- if (header->GetSuccessors().size() != 2) {
- return false;
- }
- HBasicBlock* exit = (header->GetSuccessors()[0] == body)
- ? header->GetSuccessors()[1]
- : header->GetSuccessors()[0];
- // Ensure exit can only be reached by exiting loop.
- if (exit->GetPredecessors().size() != 1) {
+ // Check loop exits.
+ HBasicBlock* exit = GetInnerLoopFiniteSingleExit(node->loop_info);
+ if (exit == nullptr) {
return false;
}
+
+ HBasicBlock* body = (header->GetSuccessors()[0] == exit)
+ ? header->GetSuccessors()[1]
+ : header->GetSuccessors()[0];
// Detect either an empty loop (no side effects other than plain iteration) or
// a trivial loop (just iterating once). Replace subsequent index uses, if any,
// with the last value and remove the loop, possibly after unrolling its body.
HPhi* main_phi = nullptr;
- if (TrySetSimpleLoopHeader(header, &main_phi)) {
+ size_t num_of_blocks = header->GetLoopInformation()->GetBlocks().NumSetBits();
+
+ if (num_of_blocks == 2 && TrySetSimpleLoopHeader(header, &main_phi)) {
bool is_empty = IsEmptyBody(body);
if (reductions_->empty() && // TODO: possible with some effort
(is_empty || trip_count == 1) &&
@@ -845,21 +923,61 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {
}
}
// Vectorize loop, if possible and valid.
- if (kEnableVectorization &&
+ if (!kEnableVectorization ||
// Disable vectorization for debuggable graphs: this is a workaround for the bug
// in 'GenerateNewLoop' which caused the SuspendCheck environment to be invalid.
// TODO: b/138601207, investigate other possible cases with wrong environment values and
// possibly switch back vectorization on for debuggable graphs.
- !graph_->IsDebuggable() &&
- TrySetSimpleLoopHeader(header, &main_phi) &&
- ShouldVectorize(node, body, trip_count) &&
- TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) {
- Vectorize(node, body, exit, trip_count);
- graph_->SetHasSIMD(true); // flag SIMD usage
- MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorized);
- return true;
+ graph_->IsDebuggable()) {
+ return false;
+ }
+
+ if (IsInPredicatedVectorizationMode()) {
+ return TryVectorizePredicated(node, body, exit, main_phi, trip_count);
+ } else {
+ return TryVectorizedTraditional(node, body, exit, main_phi, trip_count);
}
- return false;
+}
+
+bool HLoopOptimization::TryVectorizePredicated(LoopNode* node,
+ HBasicBlock* body,
+ HBasicBlock* exit,
+ HPhi* main_phi,
+ int64_t trip_count) {
+ if (!IsPredicatedLoopControlFlowSupported(node->loop_info) ||
+ !ShouldVectorizeCommon(node, main_phi, trip_count)) {
+ return false;
+ }
+
+ // Currently we can only generate cleanup loops for loops with 2 basic block.
+ //
+ // TODO: Support array disambiguation tests for CF loops.
+ if (NeedsArrayRefsDisambiguationTest() &&
+ node->loop_info->GetBlocks().NumSetBits() != 2) {
+ return false;
+ }
+
+ VectorizePredicated(node, body, exit);
+ MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorized);
+ graph_->SetHasPredicatedSIMD(true); // flag SIMD usage
+ return true;
+}
+
+bool HLoopOptimization::TryVectorizedTraditional(LoopNode* node,
+ HBasicBlock* body,
+ HBasicBlock* exit,
+ HPhi* main_phi,
+ int64_t trip_count) {
+ HBasicBlock* header = node->loop_info->GetHeader();
+ size_t num_of_blocks = header->GetLoopInformation()->GetBlocks().NumSetBits();
+
+ if (num_of_blocks != 2 || !ShouldVectorizeCommon(node, main_phi, trip_count)) {
+ return false;
+ }
+ VectorizeTraditional(node, body, exit, trip_count);
+ MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorized);
+ graph_->SetHasTraditionalSIMD(true); // flag SIMD usage
+ return true;
}
bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
@@ -1006,7 +1124,10 @@ bool HLoopOptimization::TryLoopScalarOpts(LoopNode* node) {
// Intel Press, June, 2004 (http://www.aartbik.com/).
//
-bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count) {
+
+bool HLoopOptimization::CanVectorizeDataFlow(LoopNode* node,
+ HBasicBlock* header,
+ bool collect_alignment_info) {
// Reset vector bookkeeping.
vector_length_ = 0;
vector_refs_->clear();
@@ -1015,16 +1136,30 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6
vector_runtime_test_a_ =
vector_runtime_test_b_ = nullptr;
- // Phis in the loop-body prevent vectorization.
- if (!block->GetPhis().IsEmpty()) {
- return false;
- }
+ // Traverse the data flow of the loop, in the original program order.
+ for (HBlocksInLoopReversePostOrderIterator block_it(*header->GetLoopInformation());
+ !block_it.Done();
+ block_it.Advance()) {
+ HBasicBlock* block = block_it.Current();
- // Scan the loop-body, starting a right-hand-side tree traversal at each left-hand-side
- // occurrence, which allows passing down attributes down the use tree.
- for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
- if (!VectorizeDef(node, it.Current(), /*generate_code*/ false)) {
- return false; // failure to vectorize a left-hand-side
+ if (block == header) {
+ // The header is of a certain structure (TrySetSimpleLoopHeader) and doesn't need to be
+ // processed here.
+ continue;
+ }
+
+ // Phis in the loop-body prevent vectorization.
+ // TODO: Enable vectorization of CF loops with Phis.
+ if (!block->GetPhis().IsEmpty()) {
+ return false;
+ }
+
+ // Scan the loop-body instructions, starting a right-hand-side tree traversal at each
+ // left-hand-side occurrence, which allows passing down attributes down the use tree.
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ if (!VectorizeDef(node, it.Current(), /*generate_code*/ false)) {
+ return false; // failure to vectorize a left-hand-side
+ }
}
}
@@ -1111,24 +1246,123 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6
}
} // for i
- if (!IsInPredicatedVectorizationMode()) {
- // Find a suitable alignment strategy.
+ if (collect_alignment_info) {
+ // Update the info on alignment strategy.
SetAlignmentStrategy(peeling_votes, peeling_candidate);
}
- // Does vectorization seem profitable?
- if (!IsVectorizationProfitable(trip_count)) {
+ // Success!
+ return true;
+}
+
+bool HLoopOptimization::ShouldVectorizeCommon(LoopNode* node,
+ HPhi* main_phi,
+ int64_t trip_count) {
+ HBasicBlock* header = node->loop_info->GetHeader();
+ HBasicBlock* preheader = node->loop_info->GetPreHeader();
+
+ bool enable_alignment_strategies = !IsInPredicatedVectorizationMode();
+ if (!TrySetSimpleLoopHeader(header, &main_phi) ||
+ !CanVectorizeDataFlow(node, header, enable_alignment_strategies) ||
+ !IsVectorizationProfitable(trip_count) ||
+ !TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) {
return false;
}
- // Success!
return true;
}
-void HLoopOptimization::Vectorize(LoopNode* node,
- HBasicBlock* block,
- HBasicBlock* exit,
- int64_t trip_count) {
+void HLoopOptimization::VectorizePredicated(LoopNode* node,
+ HBasicBlock* block,
+ HBasicBlock* exit) {
+ DCHECK(IsInPredicatedVectorizationMode());
+
+ HBasicBlock* header = node->loop_info->GetHeader();
+ HBasicBlock* preheader = node->loop_info->GetPreHeader();
+
+ // Adjust vector bookkeeping.
+ HPhi* main_phi = nullptr;
+ bool is_simple_loop_header = TrySetSimpleLoopHeader(header, &main_phi); // refills sets
+ DCHECK(is_simple_loop_header);
+ vector_header_ = header;
+ vector_body_ = block;
+
+ // Loop induction type.
+ DataType::Type induc_type = main_phi->GetType();
+ DCHECK(induc_type == DataType::Type::kInt32 || induc_type == DataType::Type::kInt64)
+ << induc_type;
+
+ // Generate loop control:
+ // stc = <trip-count>;
+ // vtc = <vector trip-count>
+ HInstruction* stc = induction_range_.GenerateTripCount(node->loop_info, graph_, preheader);
+ HInstruction* vtc = stc;
+ vector_index_ = graph_->GetConstant(induc_type, 0);
+ bool needs_disambiguation_test = false;
+ // Generate runtime disambiguation test:
+ // vtc = a != b ? vtc : 0;
+ if (NeedsArrayRefsDisambiguationTest()) {
+ HInstruction* rt = Insert(
+ preheader,
+ new (global_allocator_) HNotEqual(vector_runtime_test_a_, vector_runtime_test_b_));
+ vtc = Insert(preheader,
+ new (global_allocator_)
+ HSelect(rt, vtc, graph_->GetConstant(induc_type, 0), kNoDexPc));
+ needs_disambiguation_test = true;
+ }
+
+ // Generate vector loop:
+ // for ( ; i < vtc; i += vector_length)
+ // <vectorized-loop-body>
+ HBasicBlock* preheader_for_vector_loop =
+ graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit);
+ vector_mode_ = kVector;
+ GenerateNewLoopPredicated(node,
+ preheader_for_vector_loop,
+ vector_index_,
+ vtc,
+ graph_->GetConstant(induc_type, vector_length_));
+
+ // Generate scalar loop, if needed:
+ // for ( ; i < stc; i += 1)
+ // <loop-body>
+ if (needs_disambiguation_test) {
+ vector_mode_ = kSequential;
+ HBasicBlock* preheader_for_cleanup_loop =
+ graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit);
+ // Use "Traditional" version for the sequential loop.
+ GenerateNewLoopScalarOrTraditional(node,
+ preheader_for_cleanup_loop,
+ vector_index_,
+ stc,
+ graph_->GetConstant(induc_type, 1),
+ LoopAnalysisInfo::kNoUnrollingFactor);
+ }
+
+ FinalizeVectorization(node);
+
+ // Assign governing predicates for the predicated instructions inserted during vectorization
+ // outside the loop.
+ for (auto it : *vector_external_set_) {
+ DCHECK(it->IsVecOperation());
+ HVecOperation* vec_op = it->AsVecOperation();
+
+ HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_,
+ graph_->GetIntConstant(1),
+ vec_op->GetPackedType(),
+ vec_op->GetVectorLength(),
+ 0u);
+ vec_op->GetBlock()->InsertInstructionBefore(set_pred, vec_op);
+ vec_op->SetMergingGoverningPredicate(set_pred);
+ }
+}
+
+void HLoopOptimization::VectorizeTraditional(LoopNode* node,
+ HBasicBlock* block,
+ HBasicBlock* exit,
+ int64_t trip_count) {
+ DCHECK(!IsInPredicatedVectorizationMode());
+
HBasicBlock* header = node->loop_info->GetHeader();
HBasicBlock* preheader = node->loop_info->GetPreHeader();
@@ -1141,7 +1375,7 @@ void HLoopOptimization::Vectorize(LoopNode* node,
// A cleanup loop is needed, at least, for any unknown trip count or
// for a known trip count with remainder iterations after vectorization.
- bool needs_cleanup = !IsInPredicatedVectorizationMode() &&
+ bool needs_cleanup =
(trip_count == 0 || ((trip_count - vector_static_peeling_factor_) % chunk) != 0);
// Adjust vector bookkeeping.
@@ -1160,13 +1394,11 @@ void HLoopOptimization::Vectorize(LoopNode* node,
// ptc = <peeling factor>;
HInstruction* ptc = nullptr;
if (vector_static_peeling_factor_ != 0) {
- DCHECK(!IsInPredicatedVectorizationMode());
// Static loop peeling for SIMD alignment (using the most suitable
// fixed peeling factor found during prior alignment analysis).
DCHECK(vector_dynamic_peeling_candidate_ == nullptr);
ptc = graph_->GetConstant(induc_type, vector_static_peeling_factor_);
} else if (vector_dynamic_peeling_candidate_ != nullptr) {
- DCHECK(!IsInPredicatedVectorizationMode());
// Dynamic loop peeling for SIMD alignment (using the most suitable
// candidate found during prior alignment analysis):
// rem = offset % ALIGN; // adjusted as #elements
@@ -1197,7 +1429,6 @@ void HLoopOptimization::Vectorize(LoopNode* node,
HInstruction* stc = induction_range_.GenerateTripCount(node->loop_info, graph_, preheader);
HInstruction* vtc = stc;
if (needs_cleanup) {
- DCHECK(!IsInPredicatedVectorizationMode());
DCHECK(IsPowerOfTwo(chunk));
HInstruction* diff = stc;
if (ptc != nullptr) {
@@ -1217,7 +1448,7 @@ void HLoopOptimization::Vectorize(LoopNode* node,
// Generate runtime disambiguation test:
// vtc = a != b ? vtc : 0;
- if (vector_runtime_test_a_ != nullptr) {
+ if (NeedsArrayRefsDisambiguationTest()) {
HInstruction* rt = Insert(
preheader,
new (global_allocator_) HNotEqual(vector_runtime_test_a_, vector_runtime_test_b_));
@@ -1235,45 +1466,52 @@ void HLoopOptimization::Vectorize(LoopNode* node,
// moved around during suspend checks, since all analysis was based on
// nothing more than the Android runtime alignment conventions.
if (ptc != nullptr) {
- DCHECK(!IsInPredicatedVectorizationMode());
vector_mode_ = kSequential;
- GenerateNewLoop(node,
- block,
- graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit),
- vector_index_,
- ptc,
- graph_->GetConstant(induc_type, 1),
- LoopAnalysisInfo::kNoUnrollingFactor);
+ HBasicBlock* preheader_for_peeling_loop =
+ graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit);
+ GenerateNewLoopScalarOrTraditional(node,
+ preheader_for_peeling_loop,
+ vector_index_,
+ ptc,
+ graph_->GetConstant(induc_type, 1),
+ LoopAnalysisInfo::kNoUnrollingFactor);
}
// Generate vector loop, possibly further unrolled:
// for ( ; i < vtc; i += chunk)
// <vectorized-loop-body>
vector_mode_ = kVector;
- GenerateNewLoop(node,
- block,
- graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit),
- vector_index_,
- vtc,
- graph_->GetConstant(induc_type, vector_length_), // increment per unroll
- unroll);
- HLoopInformation* vloop = vector_header_->GetLoopInformation();
+ HBasicBlock* preheader_for_vector_loop =
+ graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit);
+ GenerateNewLoopScalarOrTraditional(node,
+ preheader_for_vector_loop,
+ vector_index_,
+ vtc,
+ graph_->GetConstant(induc_type, vector_length_), // per unroll
+ unroll);
// Generate cleanup loop, if needed:
// for ( ; i < stc; i += 1)
// <loop-body>
if (needs_cleanup) {
- DCHECK_IMPLIES(IsInPredicatedVectorizationMode(), vector_runtime_test_a_ != nullptr);
vector_mode_ = kSequential;
- GenerateNewLoop(node,
- block,
- graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit),
- vector_index_,
- stc,
- graph_->GetConstant(induc_type, 1),
- LoopAnalysisInfo::kNoUnrollingFactor);
+ HBasicBlock* preheader_for_cleanup_loop =
+ graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit);
+ GenerateNewLoopScalarOrTraditional(node,
+ preheader_for_cleanup_loop,
+ vector_index_,
+ stc,
+ graph_->GetConstant(induc_type, 1),
+ LoopAnalysisInfo::kNoUnrollingFactor);
}
+ FinalizeVectorization(node);
+}
+
+void HLoopOptimization::FinalizeVectorization(LoopNode* node) {
+ HBasicBlock* header = node->loop_info->GetHeader();
+ HBasicBlock* preheader = node->loop_info->GetPreHeader();
+ HLoopInformation* vloop = vector_header_->GetLoopInformation();
// Link reductions to their final uses.
for (auto i = reductions_->begin(); i != reductions_->end(); ++i) {
if (i->first->IsPhi()) {
@@ -1287,9 +1525,17 @@ void HLoopOptimization::Vectorize(LoopNode* node,
}
}
- // Remove the original loop by disconnecting the body block
- // and removing all instructions from the header.
- block->DisconnectAndDelete();
+ // Remove the original loop.
+ for (HBlocksInLoopPostOrderIterator it_loop(*node->loop_info);
+ !it_loop.Done();
+ it_loop.Advance()) {
+ HBasicBlock* cur_block = it_loop.Current();
+ if (cur_block == node->loop_info->GetHeader()) {
+ continue;
+ }
+ cur_block->DisconnectAndDelete();
+ }
+
while (!header->GetFirstInstruction()->IsGoto()) {
header->RemoveInstruction(header->GetFirstInstruction());
}
@@ -1301,14 +1547,7 @@ void HLoopOptimization::Vectorize(LoopNode* node,
node->loop_info = vloop;
}
-void HLoopOptimization::GenerateNewLoop(LoopNode* node,
- HBasicBlock* block,
- HBasicBlock* new_preheader,
- HInstruction* lo,
- HInstruction* hi,
- HInstruction* step,
- uint32_t unroll) {
- DCHECK(unroll == 1 || vector_mode_ == kVector);
+HPhi* HLoopOptimization::InitializeForNewLoop(HBasicBlock* new_preheader, HInstruction* lo) {
DataType::Type induc_type = lo->GetType();
// Prepare new loop.
vector_preheader_ = new_preheader,
@@ -1318,68 +1557,160 @@ void HLoopOptimization::GenerateNewLoop(LoopNode* node,
kNoRegNumber,
0,
HPhi::ToPhiType(induc_type));
- // Generate header and prepare body.
- // for (i = lo; i < hi; i += step)
- // <loop-body>
- HInstruction* cond = nullptr;
- HInstruction* set_pred = nullptr;
- if (IsInPredicatedVectorizationMode()) {
- HVecPredWhile* pred_while =
- new (global_allocator_) HVecPredWhile(global_allocator_,
- phi,
- hi,
- HVecPredWhile::CondKind::kLO,
- DataType::Type::kInt32,
- vector_length_,
- 0u);
-
- cond = new (global_allocator_) HVecPredCondition(global_allocator_,
- pred_while,
- HVecPredCondition::PCondKind::kNFirst,
- DataType::Type::kInt32,
- vector_length_,
- 0u);
-
- vector_header_->AddPhi(phi);
- vector_header_->AddInstruction(pred_while);
- vector_header_->AddInstruction(cond);
- set_pred = pred_while;
- } else {
- cond = new (global_allocator_) HAboveOrEqual(phi, hi);
- vector_header_->AddPhi(phi);
- vector_header_->AddInstruction(cond);
- }
+ vector_header_->AddPhi(phi);
+ vector_index_ = phi;
+ vector_permanent_map_->clear();
+ vector_external_set_->clear();
+ predicate_info_map_->clear();
+
+ return phi;
+}
+void HLoopOptimization::GenerateNewLoopScalarOrTraditional(LoopNode* node,
+ HBasicBlock* new_preheader,
+ HInstruction* lo,
+ HInstruction* hi,
+ HInstruction* step,
+ uint32_t unroll) {
+ DCHECK(unroll == 1 || vector_mode_ == kVector);
+ DataType::Type induc_type = lo->GetType();
+ HPhi* phi = InitializeForNewLoop(new_preheader, lo);
+
+ // Generate loop exit check.
+ HInstruction* cond = new (global_allocator_) HAboveOrEqual(phi, hi);
+ vector_header_->AddInstruction(cond);
vector_header_->AddInstruction(new (global_allocator_) HIf(cond));
- vector_index_ = phi;
- vector_permanent_map_->clear(); // preserved over unrolling
+
for (uint32_t u = 0; u < unroll; u++) {
- // Generate instruction map.
- vector_map_->clear();
- for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ GenerateNewLoopBodyOnce(node, induc_type, step);
+ }
+
+ FinalizePhisForNewLoop(phi, lo);
+}
+
+void HLoopOptimization::GenerateNewLoopPredicated(LoopNode* node,
+ HBasicBlock* new_preheader,
+ HInstruction* lo,
+ HInstruction* hi,
+ HInstruction* step) {
+ DCHECK(IsInPredicatedVectorizationMode());
+ DCHECK_EQ(vector_mode_, kVector);
+ DataType::Type induc_type = lo->GetType();
+ HPhi* phi = InitializeForNewLoop(new_preheader, lo);
+
+ // Generate loop exit check.
+ HVecPredWhile* pred_while =
+ new (global_allocator_) HVecPredWhile(global_allocator_,
+ phi,
+ hi,
+ HVecPredWhile::CondKind::kLO,
+ DataType::Type::kInt32,
+ vector_length_,
+ 0u);
+
+ HInstruction* cond =
+ new (global_allocator_) HVecPredToBoolean(global_allocator_,
+ pred_while,
+ HVecPredToBoolean::PCondKind::kNFirst,
+ DataType::Type::kInt32,
+ vector_length_,
+ 0u);
+
+ vector_header_->AddInstruction(pred_while);
+ vector_header_->AddInstruction(cond);
+ vector_header_->AddInstruction(new (global_allocator_) HIf(cond));
+
+ PreparePredicateInfoMap(node);
+ GenerateNewLoopBodyOnce(node, induc_type, step);
+ InitPredicateInfoMap(node, pred_while);
+
+ // Assign governing predicates for instructions in the loop; the traversal order doesn't matter.
+ for (HBlocksInLoopIterator block_it(*node->loop_info);
+ !block_it.Done();
+ block_it.Advance()) {
+ HBasicBlock* cur_block = block_it.Current();
+
+ for (HInstructionIterator it(cur_block->GetInstructions()); !it.Done(); it.Advance()) {
+ auto i = vector_map_->find(it.Current());
+ if (i != vector_map_->end()) {
+ HInstruction* instr = i->second;
+
+ if (!instr->IsVecOperation()) {
+ continue;
+ }
+ // There are cases when a vector instruction, which corresponds to some instruction in the
+ // original scalar loop, is located not in the newly created vector loop but
+ // in the vector loop preheader (and hence recorded in vector_external_set_).
+ //
+ // Governing predicates will be set for such instructions separately.
+ bool in_vector_loop = vector_header_->GetLoopInformation()->Contains(*instr->GetBlock());
+ DCHECK_IMPLIES(!in_vector_loop,
+ vector_external_set_->find(instr) != vector_external_set_->end());
+
+ if (in_vector_loop &&
+ !instr->AsVecOperation()->IsPredicated()) {
+ HVecOperation* op = instr->AsVecOperation();
+ HVecPredSetOperation* pred = predicate_info_map_->Get(cur_block)->GetControlPredicate();
+ op->SetMergingGoverningPredicate(pred);
+ }
+ }
+ }
+ }
+
+ FinalizePhisForNewLoop(phi, lo);
+}
+
+void HLoopOptimization::GenerateNewLoopBodyOnce(LoopNode* node,
+ DataType::Type induc_type,
+ HInstruction* step) {
+ // Generate instruction map.
+ vector_map_->clear();
+ HLoopInformation* loop_info = node->loop_info;
+
+ // Traverse the data flow of the loop, in the original program order.
+ for (HBlocksInLoopReversePostOrderIterator block_it(*loop_info);
+ !block_it.Done();
+ block_it.Advance()) {
+ HBasicBlock* cur_block = block_it.Current();
+
+ if (cur_block == loop_info->GetHeader()) {
+ continue;
+ }
+
+ for (HInstructionIterator it(cur_block->GetInstructions()); !it.Done(); it.Advance()) {
bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true);
DCHECK(vectorized_def);
}
- // Generate body from the instruction map, but in original program order.
- HEnvironment* env = vector_header_->GetFirstInstruction()->GetEnvironment();
- for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ }
+
+ // Generate body from the instruction map, in the original program order.
+ HEnvironment* env = vector_header_->GetFirstInstruction()->GetEnvironment();
+ for (HBlocksInLoopReversePostOrderIterator block_it(*loop_info);
+ !block_it.Done();
+ block_it.Advance()) {
+ HBasicBlock* cur_block = block_it.Current();
+
+ if (cur_block == loop_info->GetHeader()) {
+ continue;
+ }
+
+ for (HInstructionIterator it(cur_block->GetInstructions()); !it.Done(); it.Advance()) {
auto i = vector_map_->find(it.Current());
if (i != vector_map_->end() && !i->second->IsInBlock()) {
Insert(vector_body_, i->second);
- if (IsInPredicatedVectorizationMode() && i->second->IsVecOperation()) {
- HVecOperation* op = i->second->AsVecOperation();
- op->SetMergingGoverningPredicate(set_pred);
- }
// Deal with instructions that need an environment, such as the scalar intrinsics.
if (i->second->NeedsEnvironment()) {
i->second->CopyEnvironmentFromWithLoopPhiAdjustment(env, vector_header_);
}
}
}
- // Generate the induction.
- vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step);
- Insert(vector_body_, vector_index_);
}
+ // Generate the induction.
+ vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step);
+ Insert(vector_body_, vector_index_);
+}
+
+void HLoopOptimization::FinalizePhisForNewLoop(HPhi* phi, HInstruction* lo) {
// Finalize phi inputs for the reductions (if any).
for (auto i = reductions_->begin(); i != reductions_->end(); ++i) {
if (!i->first->IsPhi()) {
@@ -1442,10 +1773,13 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node,
VectorizeDotProdIdiom(node, instruction, generate_code, type, restrictions) ||
(TrySetVectorType(type, &restrictions) &&
VectorizeUse(node, instruction, generate_code, type, restrictions))) {
+ DCHECK(!instruction->IsPhi());
if (generate_code) {
- HInstruction* new_red = vector_map_->Get(instruction);
- vector_permanent_map_->Put(new_red, vector_map_->Get(redit->second));
- vector_permanent_map_->Overwrite(redit->second, new_red);
+ HInstruction* new_red_vec_op = vector_map_->Get(instruction);
+ HInstruction* original_phi = redit->second;
+ DCHECK(original_phi->IsPhi());
+ vector_permanent_map_->Put(new_red_vec_op, vector_map_->Get(original_phi));
+ vector_permanent_map_->Overwrite(original_phi, new_red_vec_op);
}
return true;
}
@@ -1455,6 +1789,10 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node,
if (instruction->IsGoto()) {
return true;
}
+
+ if (instruction->IsIf()) {
+ return VectorizeIfCondition(node, instruction, generate_code, restrictions);
+ }
// Otherwise accept only expressions with no effects outside the immediate loop-body.
// Note that actual uses are inspected during right-hand-side tree traversal.
return !IsUsedOutsideLoop(node->loop_info, instruction)
@@ -1485,9 +1823,7 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
// Deal with vector restrictions.
bool is_string_char_at = instruction->AsArrayGet()->IsStringCharAt();
- if (is_string_char_at && (HasVectorRestrictions(restrictions, kNoStringCharAt) ||
- IsInPredicatedVectorizationMode())) {
- // TODO: Support CharAt for predicated mode.
+ if (is_string_char_at && (HasVectorRestrictions(restrictions, kNoStringCharAt))) {
return false;
}
// Accept a right-hand-side array base[index] for
@@ -1676,6 +2012,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
case InstructionSet::kThumb2:
// Allow vectorization for all ARM devices, because Android assumes that
// ARM 32-bit always supports advanced SIMD (64-bit SIMD).
+ *restrictions |= kNoIfCond;
switch (type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
@@ -1701,6 +2038,13 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
DCHECK_EQ(simd_register_size_ % DataType::Size(type), 0u);
switch (type) {
case DataType::Type::kBool:
+ *restrictions |= kNoDiv |
+ kNoSignedHAdd |
+ kNoUnsignedHAdd |
+ kNoUnroundedHAdd |
+ kNoSAD |
+ kNoIfCond;
+ return TrySetVectorLength(type, vector_length);
case DataType::Type::kUint8:
case DataType::Type::kInt8:
*restrictions |= kNoDiv |
@@ -1712,6 +2056,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
case DataType::Type::kUint16:
case DataType::Type::kInt16:
*restrictions |= kNoDiv |
+ kNoStringCharAt | // TODO: support in predicated mode.
kNoSignedHAdd |
kNoUnsignedHAdd |
kNoUnroundedHAdd |
@@ -1722,13 +2067,13 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
*restrictions |= kNoDiv | kNoSAD;
return TrySetVectorLength(type, vector_length);
case DataType::Type::kInt64:
- *restrictions |= kNoDiv | kNoSAD;
+ *restrictions |= kNoDiv | kNoSAD | kNoIfCond;
return TrySetVectorLength(type, vector_length);
case DataType::Type::kFloat32:
- *restrictions |= kNoReduction;
+ *restrictions |= kNoReduction | kNoIfCond;
return TrySetVectorLength(type, vector_length);
case DataType::Type::kFloat64:
- *restrictions |= kNoReduction;
+ *restrictions |= kNoReduction | kNoIfCond;
return TrySetVectorLength(type, vector_length);
default:
break;
@@ -1737,6 +2082,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
} else {
// Allow vectorization for all ARM devices, because Android assumes that
// ARMv8 AArch64 always supports advanced SIMD (128-bit SIMD).
+ *restrictions |= kNoIfCond;
switch (type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
@@ -1767,6 +2113,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
case InstructionSet::kX86:
case InstructionSet::kX86_64:
// Allow vectorization for SSE4.1-enabled X86 devices only (128-bit SIMD).
+ *restrictions |= kNoIfCond;
if (features->AsX86InstructionSetFeatures()->HasSSE4_1()) {
switch (type) {
case DataType::Type::kBool:
@@ -1855,15 +2202,7 @@ void HLoopOptimization::GenerateVecInv(HInstruction* org, DataType::Type type) {
vector = new (global_allocator_)
HVecReplicateScalar(global_allocator_, input, type, vector_length_, kNoDexPc);
vector_permanent_map_->Put(org, Insert(vector_preheader_, vector));
- if (IsInPredicatedVectorizationMode()) {
- HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_,
- graph_->GetIntConstant(1),
- type,
- vector_length_,
- 0u);
- vector_preheader_->InsertInstructionBefore(set_pred, vector);
- vector->AsVecOperation()->SetMergingGoverningPredicate(set_pred);
- }
+ vector_external_set_->insert(vector);
}
vector_map_->Put(org, vector);
}
@@ -1936,18 +2275,18 @@ void HLoopOptimization::GenerateVecMem(HInstruction* org,
vector_map_->Put(org, vector);
}
-void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) {
- DCHECK(reductions_->find(phi) != reductions_->end());
- DCHECK(reductions_->Get(phi->InputAt(1)) == phi);
+void HLoopOptimization::GenerateVecReductionPhi(HPhi* orig_phi) {
+ DCHECK(reductions_->find(orig_phi) != reductions_->end());
+ DCHECK(reductions_->Get(orig_phi->InputAt(1)) == orig_phi);
HInstruction* vector = nullptr;
if (vector_mode_ == kSequential) {
HPhi* new_phi = new (global_allocator_) HPhi(
- global_allocator_, kNoRegNumber, 0, phi->GetType());
+ global_allocator_, kNoRegNumber, 0, orig_phi->GetType());
vector_header_->AddPhi(new_phi);
vector = new_phi;
} else {
// Link vector reduction back to prior unrolled update, or a first phi.
- auto it = vector_permanent_map_->find(phi);
+ auto it = vector_permanent_map_->find(orig_phi);
if (it != vector_permanent_map_->end()) {
vector = it->second;
} else {
@@ -1957,7 +2296,7 @@ void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) {
vector = new_phi;
}
}
- vector_map_->Put(phi, vector);
+ vector_map_->Put(orig_phi, vector);
}
void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction) {
@@ -1992,15 +2331,7 @@ void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* r
vector_length,
kNoDexPc));
}
- if (IsInPredicatedVectorizationMode()) {
- HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_,
- graph_->GetIntConstant(1),
- type,
- vector_length,
- 0u);
- vector_preheader_->InsertInstructionBefore(set_pred, new_init);
- new_init->AsVecOperation()->SetMergingGoverningPredicate(set_pred);
- }
+ vector_external_set_->insert(new_init);
} else {
new_init = ReduceAndExtractIfNeeded(new_init);
}
@@ -2026,23 +2357,15 @@ HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruct
// x = REDUCE( [x_1, .., x_n] )
// y = x_1
// along the exit of the defining loop.
- HInstruction* reduce = new (global_allocator_) HVecReduce(
+ HVecReduce* reduce = new (global_allocator_) HVecReduce(
global_allocator_, instruction, type, vector_length, kind, kNoDexPc);
exit->InsertInstructionBefore(reduce, exit->GetFirstInstruction());
+ vector_external_set_->insert(reduce);
instruction = new (global_allocator_) HVecExtractScalar(
global_allocator_, reduce, type, vector_length, 0, kNoDexPc);
exit->InsertInstructionAfter(instruction, reduce);
- if (IsInPredicatedVectorizationMode()) {
- HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_,
- graph_->GetIntConstant(1),
- type,
- vector_length,
- 0u);
- exit->InsertInstructionBefore(set_pred, reduce);
- reduce->AsVecOperation()->SetMergingGoverningPredicate(set_pred);
- instruction->AsVecOperation()->SetMergingGoverningPredicate(set_pred);
- }
+ vector_external_set_->insert(instruction);
}
}
return instruction;
@@ -2057,10 +2380,10 @@ HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruct
} \
break;
-void HLoopOptimization::GenerateVecOp(HInstruction* org,
- HInstruction* opa,
- HInstruction* opb,
- DataType::Type type) {
+HInstruction* HLoopOptimization::GenerateVecOp(HInstruction* org,
+ HInstruction* opa,
+ HInstruction* opb,
+ DataType::Type type) {
uint32_t dex_pc = org->GetDexPc();
HInstruction* vector = nullptr;
DataType::Type org_type = org->GetType();
@@ -2130,11 +2453,23 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
GENERATE_VEC(
new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc),
new (global_allocator_) HAbs(org_type, opa, dex_pc));
+ case HInstruction::kEqual: {
+ // Special case.
+ if (vector_mode_ == kVector) {
+ vector = new (global_allocator_) HVecCondition(
+ global_allocator_, opa, opb, type, vector_length_, dex_pc);
+ } else {
+ DCHECK(vector_mode_ == kSequential);
+ UNREACHABLE();
+ }
+ }
+ break;
default:
break;
} // switch
CHECK(vector != nullptr) << "Unsupported SIMD operator";
vector_map_->Put(org, vector);
+ return vector;
}
#undef GENERATE_VEC
@@ -2374,6 +2709,89 @@ bool HLoopOptimization::VectorizeDotProdIdiom(LoopNode* node,
return false;
}
+bool HLoopOptimization::VectorizeIfCondition(LoopNode* node,
+ HInstruction* hif,
+ bool generate_code,
+ uint64_t restrictions) {
+ DCHECK(hif->IsIf());
+ HInstruction* if_input = hif->InputAt(0);
+
+ if (!if_input->HasOnlyOneNonEnvironmentUse()) {
+ // Avoid the complications of the condition used as materialized boolean.
+ return false;
+ }
+
+ if (!if_input->IsEqual()) {
+ // TODO: Support other condition types.
+ return false;
+ }
+
+ HCondition* cond = if_input->AsCondition();
+ HInstruction* opa = cond->InputAt(0);
+ HInstruction* opb = cond->InputAt(1);
+ DataType::Type type = GetNarrowerType(opa, opb);
+
+ if (!DataType::IsIntegralType(type)) {
+ return false;
+ }
+
+ bool is_unsigned = false;
+ HInstruction* opa_promoted = opa;
+ HInstruction* opb_promoted = opb;
+ bool is_int_case = DataType::Type::kInt32 == opa->GetType() &&
+ DataType::Type::kInt32 == opb->GetType();
+
+ // Condition arguments should be either both int32 or consistently extended signed/unsigned
+ // narrower operands.
+ if (!is_int_case &&
+ !IsNarrowerOperands(opa, opb, type, &opa_promoted, &opb_promoted, &is_unsigned)) {
+ return false;
+ }
+ type = HVecOperation::ToProperType(type, is_unsigned);
+
+ // For narrow types, explicit type conversion may have been
+ // optimized way, so set the no hi bits restriction here.
+ if (DataType::Size(type) <= 2) {
+ restrictions |= kNoHiBits;
+ }
+
+ if (!TrySetVectorType(type, &restrictions) ||
+ HasVectorRestrictions(restrictions, kNoIfCond)) {
+ return false;
+ }
+
+ if (generate_code && vector_mode_ != kVector) { // de-idiom
+ opa_promoted = opa;
+ opb_promoted = opb;
+ }
+
+ if (VectorizeUse(node, opa_promoted, generate_code, type, restrictions) &&
+ VectorizeUse(node, opb_promoted, generate_code, type, restrictions)) {
+ if (generate_code) {
+ HInstruction* vec_cond = GenerateVecOp(cond,
+ vector_map_->Get(opa_promoted),
+ vector_map_->Get(opb_promoted),
+ type);
+
+ if (vector_mode_ == kVector) {
+ HInstruction* vec_pred_not = new (global_allocator_) HVecPredNot(
+ global_allocator_, vec_cond, type, vector_length_, hif->GetDexPc());
+
+ vector_map_->Put(hif, vec_pred_not);
+ BlockPredicateInfo* pred_info = predicate_info_map_->Get(hif->GetBlock());
+ pred_info->SetControlFlowInfo(vec_cond->AsVecPredSetOperation(),
+ vec_pred_not->AsVecPredSetOperation());
+ } else {
+ DCHECK(vector_mode_ == kSequential);
+ UNREACHABLE();
+ }
+ }
+ return true;
+ }
+
+ return false;
+}
+
//
// Vectorization heuristics.
//
@@ -2423,6 +2841,8 @@ bool HLoopOptimization::IsVectorizationProfitable(int64_t trip_count) {
// TODO: trip count is really unsigned entity, provided the guarding test
// is satisfied; deal with this more carefully later
uint32_t max_peel = MaxNumberPeeled();
+ // Peeling is not supported in predicated mode.
+ DCHECK_IMPLIES(IsInPredicatedVectorizationMode(), max_peel == 0u);
if (vector_length_ == 0) {
return false; // nothing found
} else if (trip_count < 0) {
@@ -2686,4 +3106,67 @@ bool HLoopOptimization::CanRemoveCycle() {
return true;
}
+void HLoopOptimization::PreparePredicateInfoMap(LoopNode* node) {
+ HLoopInformation* loop_info = node->loop_info;
+
+ DCHECK(IsPredicatedLoopControlFlowSupported(loop_info));
+
+ for (HBlocksInLoopIterator block_it(*loop_info);
+ !block_it.Done();
+ block_it.Advance()) {
+ HBasicBlock* cur_block = block_it.Current();
+ BlockPredicateInfo* pred_info = new (loop_allocator_) BlockPredicateInfo();
+
+ predicate_info_map_->Put(cur_block, pred_info);
+ }
+}
+
+void HLoopOptimization::InitPredicateInfoMap(LoopNode* node,
+ HVecPredSetOperation* loop_main_pred) {
+ HLoopInformation* loop_info = node->loop_info;
+ HBasicBlock* header = loop_info->GetHeader();
+ BlockPredicateInfo* header_info = predicate_info_map_->Get(header);
+ // Loop header is a special case; it doesn't have a false predicate because we
+ // would just exit the loop then.
+ header_info->SetControlFlowInfo(loop_main_pred, loop_main_pred);
+
+ size_t blocks_in_loop = header->GetLoopInformation()->GetBlocks().NumSetBits();
+ if (blocks_in_loop == 2) {
+ for (HBasicBlock* successor : header->GetSuccessors()) {
+ if (loop_info->Contains(*successor)) {
+ // This is loop second block - body.
+ BlockPredicateInfo* body_info = predicate_info_map_->Get(successor);
+ body_info->SetControlPredicate(loop_main_pred);
+ return;
+ }
+ }
+ UNREACHABLE();
+ }
+
+ // TODO: support predicated vectorization of CF loop of more complex structure.
+ DCHECK(HasLoopDiamondStructure(loop_info));
+ HBasicBlock* header_succ_0 = header->GetSuccessors()[0];
+ HBasicBlock* header_succ_1 = header->GetSuccessors()[1];
+ HBasicBlock* diamond_top = loop_info->Contains(*header_succ_0) ?
+ header_succ_0 :
+ header_succ_1;
+
+ HIf* diamond_hif = diamond_top->GetLastInstruction()->AsIf();
+ HBasicBlock* diamond_true = diamond_hif->IfTrueSuccessor();
+ HBasicBlock* diamond_false = diamond_hif->IfFalseSuccessor();
+ HBasicBlock* back_edge = diamond_true->GetSingleSuccessor();
+
+ BlockPredicateInfo* diamond_top_info = predicate_info_map_->Get(diamond_top);
+ BlockPredicateInfo* diamond_true_info = predicate_info_map_->Get(diamond_true);
+ BlockPredicateInfo* diamond_false_info = predicate_info_map_->Get(diamond_false);
+ BlockPredicateInfo* back_edge_info = predicate_info_map_->Get(back_edge);
+
+ diamond_top_info->SetControlPredicate(header_info->GetTruePredicate());
+
+ diamond_true_info->SetControlPredicate(diamond_top_info->GetTruePredicate());
+ diamond_false_info->SetControlPredicate(diamond_top_info->GetFalsePredicate());
+
+ back_edge_info->SetControlPredicate(header_info->GetTruePredicate());
+}
+
} // namespace art
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 6dd778ba74..86a9f0fcb8 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -101,6 +101,7 @@ class HLoopOptimization : public HOptimization {
kNoSAD = 1 << 11, // no sum of absolute differences (SAD)
kNoWideSAD = 1 << 12, // no sum of absolute differences (SAD) with operand widening
kNoDotProd = 1 << 13, // no dot product
+ kNoIfCond = 1 << 14, // no if condition conversion
};
/*
@@ -136,6 +137,95 @@ class HLoopOptimization : public HOptimization {
bool is_string_char_at; // compressed string read
};
+ // This structure describes the control flow (CF) -> data flow (DF) conversion of the loop
+ // with control flow (see below) for the purpose of predicated autovectorization.
+ //
+ // Lets define "loops without control-flow" (or non-CF loops) as loops with two consecutive
+ // blocks and without the branching structure except for the loop exit. And
+ // "loop with control-flow" (or CF-loops) - all other loops.
+ //
+ // In the execution of the original CF-loop on each iteration some basic block Y will be
+ // either executed or not executed, depending on the control flow of the loop. More
+ // specifically, a block will be executed if all the conditional branches of the nodes in
+ // the control dependency graph for that block Y are taken according to the path from the loop
+ // header to that basic block.
+ //
+ // This is the key idea of CF->DF conversion: a boolean value
+ // 'ctrl_pred == cond1 && cond2 && ...' will determine whether the basic block Y will be
+ // executed, where cond_K is whether the branch of the node K in the control dependency
+ // graph upward traversal was taken in the 'right' direction.
+ //
+ // Def.: BB Y is control dependent on BB X iff
+ // (1) there exists a directed path P from X to Y with any basic block Z in P (excluding X
+ // and Y) post-dominated by Y and
+ // (2) X is not post-dominated by Y.
+ // ...
+ // X
+ // false / \ true
+ // / \
+ // ...
+ // |
+ // Y
+ // ...
+ //
+ // When doing predicated autovectorization of a CF loop, we use the CF->DF conversion approach:
+ // 1) do the data analysis and vector operation creation as if it was a non-CF loop.
+ // 2) for each HIf block create two vector predicate setting instructions - for True and False
+ // edges/paths.
+ // 3) assign a governing vector predicate (see comments near HVecPredSetOperation)
+ // to each vector operation Alpha in the loop (including to those vector predicate setting
+ // instructions created in #2); do this by:
+ // - finding the immediate control dependent block of the instruction Alpha's block.
+ // - choosing the True or False predicate setting instruction (created in #2) depending
+ // on the path to the instruction.
+ //
+ // For more information check the papers:
+ //
+ // - Allen, John R and Kennedy, Ken and Porterfield, Carrie and Warren, Joe,
+ // “Conversion of Control Dependence to Data Dependence,” in Proceedings of the 10th ACM
+ // SIGACT-SIGPLAN Symposium on Principles of Programming Languages, 1983, pp. 177–189.
+ // - JEANNE FERRANTE, KARL J. OTTENSTEIN, JOE D. WARREN,
+ // "The Program Dependence Graph and Its Use in Optimization"
+ //
+ class BlockPredicateInfo : public ArenaObject<kArenaAllocLoopOptimization> {
+ public:
+ BlockPredicateInfo() :
+ control_predicate_(nullptr),
+ true_predicate_(nullptr),
+ false_predicate_(nullptr) {}
+
+ void SetControlFlowInfo(HVecPredSetOperation* true_predicate,
+ HVecPredSetOperation* false_predicate) {
+ DCHECK(!HasControlFlowOps());
+ true_predicate_ = true_predicate;
+ false_predicate_ = false_predicate;
+ }
+
+ bool HasControlFlowOps() const {
+ // Note: a block must have both T/F predicates set or none of them.
+ DCHECK_EQ(true_predicate_ == nullptr, false_predicate_ == nullptr);
+ return true_predicate_ != nullptr;
+ }
+
+ HVecPredSetOperation* GetControlPredicate() const { return control_predicate_; }
+ void SetControlPredicate(HVecPredSetOperation* control_predicate) {
+ control_predicate_ = control_predicate;
+ }
+
+ HVecPredSetOperation* GetTruePredicate() const { return true_predicate_; }
+ HVecPredSetOperation* GetFalsePredicate() const { return false_predicate_; }
+
+ private:
+ // Vector control predicate operation, associated with the block which will determine
+ // the active lanes for all vector operations, originated from this block.
+ HVecPredSetOperation* control_predicate_;
+
+ // Vector predicate instruction, associated with the true sucessor of the block.
+ HVecPredSetOperation* true_predicate_;
+ // Vector predicate instruction, associated with the false sucessor of the block.
+ HVecPredSetOperation* false_predicate_;
+ };
+
//
// Loop setup and traversal.
//
@@ -203,15 +293,95 @@ class HLoopOptimization : public HOptimization {
// Vectorization analysis and synthesis.
//
- bool ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count);
- void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count);
- void GenerateNewLoop(LoopNode* node,
- HBasicBlock* block,
- HBasicBlock* new_preheader,
- HInstruction* lo,
- HInstruction* hi,
- HInstruction* step,
- uint32_t unroll);
+ // Returns whether the data flow requirements are met for vectorization.
+ //
+ // - checks whether instructions are vectorizable for the target.
+ // - conducts data dependence analysis for array references.
+ // - additionally, collects info on peeling and aligment strategy.
+ bool CanVectorizeDataFlow(LoopNode* node, HBasicBlock* header, bool collect_alignment_info);
+
+ // Does the checks (common for predicated and traditional mode) for the loop.
+ bool ShouldVectorizeCommon(LoopNode* node, HPhi* main_phi, int64_t trip_count);
+
+ // Try to vectorize the loop, returns whether it was successful.
+ //
+ // There are two versions/algorithms:
+ // - Predicated: all the vector operations have governing predicates which control
+ // which individual vector lanes will be active (see HVecPredSetOperation for more details).
+ // Example: vectorization using AArch64 SVE.
+ // - Traditional: a regular mode in which all vector operations lanes are unconditionally
+ // active.
+ // Example: vectoriation using AArch64 NEON.
+ bool TryVectorizePredicated(LoopNode* node,
+ HBasicBlock* body,
+ HBasicBlock* exit,
+ HPhi* main_phi,
+ int64_t trip_count);
+
+ bool TryVectorizedTraditional(LoopNode* node,
+ HBasicBlock* body,
+ HBasicBlock* exit,
+ HPhi* main_phi,
+ int64_t trip_count);
+
+ // Vectorizes the loop for which all checks have been already done.
+ void VectorizePredicated(LoopNode* node,
+ HBasicBlock* block,
+ HBasicBlock* exit);
+ void VectorizeTraditional(LoopNode* node,
+ HBasicBlock* block,
+ HBasicBlock* exit,
+ int64_t trip_count);
+
+ // Performs final steps for whole vectorization process: links reduction, removes the original
+ // scalar loop, updates loop info.
+ void FinalizeVectorization(LoopNode* node);
+
+ // Helpers that do the vector instruction synthesis for the previously created loop; create
+ // and fill the loop body with instructions.
+ //
+ // A version to generate a vector loop in predicated mode.
+ void GenerateNewLoopPredicated(LoopNode* node,
+ HBasicBlock* new_preheader,
+ HInstruction* lo,
+ HInstruction* hi,
+ HInstruction* step);
+
+ // A version to generate a vector loop in traditional mode or to generate
+ // a scalar loop for both modes.
+ void GenerateNewLoopScalarOrTraditional(LoopNode* node,
+ HBasicBlock* new_preheader,
+ HInstruction* lo,
+ HInstruction* hi,
+ HInstruction* step,
+ uint32_t unroll);
+
+ //
+ // Helpers for GenerateNewLoop*.
+ //
+
+ // Updates vectorization bookkeeping date for the new loop, creates and returns
+ // its main induction Phi.
+ HPhi* InitializeForNewLoop(HBasicBlock* new_preheader, HInstruction* lo);
+
+ // Finalizes reduction and induction phis' inputs for the newly created loop.
+ void FinalizePhisForNewLoop(HPhi* phi, HInstruction* lo);
+
+ // Creates empty predicate info object for each basic block and puts it into the map.
+ void PreparePredicateInfoMap(LoopNode* node);
+
+ // Set up block true/false predicates using info, collected through data flow and control
+ // dependency analysis.
+ void InitPredicateInfoMap(LoopNode* node, HVecPredSetOperation* loop_main_pred);
+
+ // Performs instruction synthesis for the loop body.
+ void GenerateNewLoopBodyOnce(LoopNode* node,
+ DataType::Type induc_type,
+ HInstruction* step);
+
+ // Returns whether the vector loop needs runtime disambiguation test for array refs.
+ bool NeedsArrayRefsDisambiguationTest() const { return vector_runtime_test_a_ != nullptr; }
+
bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code);
bool VectorizeUse(LoopNode* node,
HInstruction* instruction,
@@ -239,10 +409,10 @@ class HLoopOptimization : public HOptimization {
void GenerateVecReductionPhi(HPhi* phi);
void GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction);
HInstruction* ReduceAndExtractIfNeeded(HInstruction* instruction);
- void GenerateVecOp(HInstruction* org,
- HInstruction* opa,
- HInstruction* opb,
- DataType::Type type);
+ HInstruction* GenerateVecOp(HInstruction* org,
+ HInstruction* opa,
+ HInstruction* opb,
+ DataType::Type type);
// Vectorization idioms.
bool VectorizeSaturationIdiom(LoopNode* node,
@@ -265,6 +435,10 @@ class HLoopOptimization : public HOptimization {
bool generate_code,
DataType::Type type,
uint64_t restrictions);
+ bool VectorizeIfCondition(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ uint64_t restrictions);
// Vectorization heuristics.
Alignment ComputeAlignment(HInstruction* offset,
@@ -369,6 +543,16 @@ class HLoopOptimization : public HOptimization {
// Contents reside in phase-local heap memory.
ScopedArenaSafeMap<HInstruction*, HInstruction*>* vector_permanent_map_;
+ // Tracks vector operations that are inserted outside of the loop (preheader, exit)
+ // as part of vectorization (e.g. replicate scalar for loop invariants and reduce ops
+ // for loop reductions).
+ ScopedArenaSet<HInstruction*>* vector_external_set_;
+
+ // A mapping between a basic block of the original loop and its associated PredicateInfo.
+ //
+ // Only used in predicated loop vectorization mode.
+ ScopedArenaSafeMap<HBasicBlock*, BlockPredicateInfo*>* predicate_info_map_;
+
// Temporary vectorization bookkeeping.
VectorMode vector_mode_; // synthesis mode
HBasicBlock* vector_preheader_; // preheader of the new loop
diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc
index 7f694fb655..49e3c0418f 100644
--- a/compiler/optimizing/loop_optimization_test.cc
+++ b/compiler/optimizing/loop_optimization_test.cc
@@ -30,6 +30,7 @@ namespace art HIDDEN {
class LoopOptimizationTest : public OptimizingUnitTest {
protected:
void SetUp() override {
+ TEST_SETUP_DISABLED_FOR_RISCV64();
OptimizingUnitTest::SetUp();
graph_ = CreateGraph();
@@ -44,6 +45,7 @@ class LoopOptimizationTest : public OptimizingUnitTest {
}
void TearDown() override {
+ TEST_TEARDOWN_DISABLED_FOR_RISCV64();
codegen_.reset();
compiler_options_.reset();
graph_ = nullptr;
@@ -134,17 +136,20 @@ class LoopOptimizationTest : public OptimizingUnitTest {
//
TEST_F(LoopOptimizationTest, NoLoops) {
+ TEST_DISABLED_FOR_RISCV64();
PerformAnalysis();
EXPECT_EQ("", LoopStructure());
}
TEST_F(LoopOptimizationTest, SingleLoop) {
+ TEST_DISABLED_FOR_RISCV64();
AddLoop(entry_block_, return_block_);
PerformAnalysis();
EXPECT_EQ("[]", LoopStructure());
}
TEST_F(LoopOptimizationTest, LoopNest10) {
+ TEST_DISABLED_FOR_RISCV64();
HBasicBlock* b = entry_block_;
HBasicBlock* s = return_block_;
for (int i = 0; i < 10; i++) {
@@ -156,6 +161,7 @@ TEST_F(LoopOptimizationTest, LoopNest10) {
}
TEST_F(LoopOptimizationTest, LoopSequence10) {
+ TEST_DISABLED_FOR_RISCV64();
HBasicBlock* b = entry_block_;
HBasicBlock* s = return_block_;
for (int i = 0; i < 10; i++) {
@@ -167,6 +173,7 @@ TEST_F(LoopOptimizationTest, LoopSequence10) {
}
TEST_F(LoopOptimizationTest, LoopSequenceOfNests) {
+ TEST_DISABLED_FOR_RISCV64();
HBasicBlock* b = entry_block_;
HBasicBlock* s = return_block_;
for (int i = 0; i < 10; i++) {
@@ -194,6 +201,7 @@ TEST_F(LoopOptimizationTest, LoopSequenceOfNests) {
}
TEST_F(LoopOptimizationTest, LoopNestWithSequence) {
+ TEST_DISABLED_FOR_RISCV64();
HBasicBlock* b = entry_block_;
HBasicBlock* s = return_block_;
for (int i = 0; i < 10; i++) {
@@ -215,6 +223,7 @@ TEST_F(LoopOptimizationTest, LoopNestWithSequence) {
//
// This is a test for nodes.cc functionality - HGraph::SimplifyLoop.
TEST_F(LoopOptimizationTest, SimplifyLoopReoderPredecessors) {
+ TEST_DISABLED_FOR_RISCV64();
// Can't use AddLoop as we want special order for blocks predecessors.
HBasicBlock* header = new (GetAllocator()) HBasicBlock(graph_);
HBasicBlock* body = new (GetAllocator()) HBasicBlock(graph_);
@@ -260,6 +269,7 @@ TEST_F(LoopOptimizationTest, SimplifyLoopReoderPredecessors) {
//
// This is a test for nodes.cc functionality - HGraph::SimplifyLoop.
TEST_F(LoopOptimizationTest, SimplifyLoopSinglePreheader) {
+ TEST_DISABLED_FOR_RISCV64();
HBasicBlock* header = AddLoop(entry_block_, return_block_);
header->InsertInstructionBefore(
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 3790058879..5795ea7ca9 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -36,6 +36,7 @@
#include "code_generator.h"
#include "common_dominator.h"
#include "intrinsics.h"
+#include "intrinsics_list.h"
#include "mirror/class-inl.h"
#include "scoped_thread_state_change-inl.h"
#include "ssa_builder.h"
@@ -1488,12 +1489,12 @@ bool HInstructionList::FoundBefore(const HInstruction* instruction1,
const HInstruction* instruction2) const {
DCHECK_EQ(instruction1->GetBlock(), instruction2->GetBlock());
for (HInstructionIterator it(*this); !it.Done(); it.Advance()) {
- if (it.Current() == instruction1) {
- return true;
- }
if (it.Current() == instruction2) {
return false;
}
+ if (it.Current() == instruction1) {
+ return true;
+ }
}
LOG(FATAL) << "Did not find an order between two instructions of the same block.";
UNREACHABLE();
@@ -1815,10 +1816,12 @@ void HGraphVisitor::VisitBasicBlock(HBasicBlock* block) {
}
}
-HConstant* HTypeConversion::TryStaticEvaluation() const {
- HGraph* graph = GetBlock()->GetGraph();
- if (GetInput()->IsIntConstant()) {
- int32_t value = GetInput()->AsIntConstant()->GetValue();
+HConstant* HTypeConversion::TryStaticEvaluation() const { return TryStaticEvaluation(GetInput()); }
+
+HConstant* HTypeConversion::TryStaticEvaluation(HInstruction* input) const {
+ HGraph* graph = input->GetBlock()->GetGraph();
+ if (input->IsIntConstant()) {
+ int32_t value = input->AsIntConstant()->GetValue();
switch (GetResultType()) {
case DataType::Type::kInt8:
return graph->GetIntConstant(static_cast<int8_t>(value), GetDexPc());
@@ -1837,8 +1840,8 @@ HConstant* HTypeConversion::TryStaticEvaluation() const {
default:
return nullptr;
}
- } else if (GetInput()->IsLongConstant()) {
- int64_t value = GetInput()->AsLongConstant()->GetValue();
+ } else if (input->IsLongConstant()) {
+ int64_t value = input->AsLongConstant()->GetValue();
switch (GetResultType()) {
case DataType::Type::kInt8:
return graph->GetIntConstant(static_cast<int8_t>(value), GetDexPc());
@@ -1857,8 +1860,8 @@ HConstant* HTypeConversion::TryStaticEvaluation() const {
default:
return nullptr;
}
- } else if (GetInput()->IsFloatConstant()) {
- float value = GetInput()->AsFloatConstant()->GetValue();
+ } else if (input->IsFloatConstant()) {
+ float value = input->AsFloatConstant()->GetValue();
switch (GetResultType()) {
case DataType::Type::kInt32:
if (std::isnan(value))
@@ -1881,8 +1884,8 @@ HConstant* HTypeConversion::TryStaticEvaluation() const {
default:
return nullptr;
}
- } else if (GetInput()->IsDoubleConstant()) {
- double value = GetInput()->AsDoubleConstant()->GetValue();
+ } else if (input->IsDoubleConstant()) {
+ double value = input->AsDoubleConstant()->GetValue();
switch (GetResultType()) {
case DataType::Type::kInt32:
if (std::isnan(value))
@@ -1909,41 +1912,47 @@ HConstant* HTypeConversion::TryStaticEvaluation() const {
return nullptr;
}
-HConstant* HUnaryOperation::TryStaticEvaluation() const {
- if (GetInput()->IsIntConstant()) {
- return Evaluate(GetInput()->AsIntConstant());
- } else if (GetInput()->IsLongConstant()) {
- return Evaluate(GetInput()->AsLongConstant());
+HConstant* HUnaryOperation::TryStaticEvaluation() const { return TryStaticEvaluation(GetInput()); }
+
+HConstant* HUnaryOperation::TryStaticEvaluation(HInstruction* input) const {
+ if (input->IsIntConstant()) {
+ return Evaluate(input->AsIntConstant());
+ } else if (input->IsLongConstant()) {
+ return Evaluate(input->AsLongConstant());
} else if (kEnableFloatingPointStaticEvaluation) {
- if (GetInput()->IsFloatConstant()) {
- return Evaluate(GetInput()->AsFloatConstant());
- } else if (GetInput()->IsDoubleConstant()) {
- return Evaluate(GetInput()->AsDoubleConstant());
+ if (input->IsFloatConstant()) {
+ return Evaluate(input->AsFloatConstant());
+ } else if (input->IsDoubleConstant()) {
+ return Evaluate(input->AsDoubleConstant());
}
}
return nullptr;
}
HConstant* HBinaryOperation::TryStaticEvaluation() const {
- if (GetLeft()->IsIntConstant() && GetRight()->IsIntConstant()) {
- return Evaluate(GetLeft()->AsIntConstant(), GetRight()->AsIntConstant());
- } else if (GetLeft()->IsLongConstant()) {
- if (GetRight()->IsIntConstant()) {
+ return TryStaticEvaluation(GetLeft(), GetRight());
+}
+
+HConstant* HBinaryOperation::TryStaticEvaluation(HInstruction* left, HInstruction* right) const {
+ if (left->IsIntConstant() && right->IsIntConstant()) {
+ return Evaluate(left->AsIntConstant(), right->AsIntConstant());
+ } else if (left->IsLongConstant()) {
+ if (right->IsIntConstant()) {
// The binop(long, int) case is only valid for shifts and rotations.
DCHECK(IsShl() || IsShr() || IsUShr() || IsRor()) << DebugName();
- return Evaluate(GetLeft()->AsLongConstant(), GetRight()->AsIntConstant());
- } else if (GetRight()->IsLongConstant()) {
- return Evaluate(GetLeft()->AsLongConstant(), GetRight()->AsLongConstant());
+ return Evaluate(left->AsLongConstant(), right->AsIntConstant());
+ } else if (right->IsLongConstant()) {
+ return Evaluate(left->AsLongConstant(), right->AsLongConstant());
}
- } else if (GetLeft()->IsNullConstant() && GetRight()->IsNullConstant()) {
+ } else if (left->IsNullConstant() && right->IsNullConstant()) {
// The binop(null, null) case is only valid for equal and not-equal conditions.
DCHECK(IsEqual() || IsNotEqual()) << DebugName();
- return Evaluate(GetLeft()->AsNullConstant(), GetRight()->AsNullConstant());
+ return Evaluate(left->AsNullConstant(), right->AsNullConstant());
} else if (kEnableFloatingPointStaticEvaluation) {
- if (GetLeft()->IsFloatConstant() && GetRight()->IsFloatConstant()) {
- return Evaluate(GetLeft()->AsFloatConstant(), GetRight()->AsFloatConstant());
- } else if (GetLeft()->IsDoubleConstant() && GetRight()->IsDoubleConstant()) {
- return Evaluate(GetLeft()->AsDoubleConstant(), GetRight()->AsDoubleConstant());
+ if (left->IsFloatConstant() && right->IsFloatConstant()) {
+ return Evaluate(left->AsFloatConstant(), right->AsFloatConstant());
+ } else if (left->IsDoubleConstant() && right->IsDoubleConstant()) {
+ return Evaluate(left->AsDoubleConstant(), right->AsDoubleConstant());
}
}
return nullptr;
@@ -2797,8 +2806,11 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
if (HasMonitorOperations()) {
outer_graph->SetHasMonitorOperations(true);
}
- if (HasSIMD()) {
- outer_graph->SetHasSIMD(true);
+ if (HasTraditionalSIMD()) {
+ outer_graph->SetHasTraditionalSIMD(true);
+ }
+ if (HasPredicatedSIMD()) {
+ outer_graph->SetHasPredicatedSIMD(true);
}
if (HasAlwaysThrowingInvokes()) {
outer_graph->SetHasAlwaysThrowingInvokes(true);
@@ -3026,9 +3038,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
replacement = outer_graph->GetDoubleConstant(
current->AsDoubleConstant()->GetValue(), current->GetDexPc());
} else if (current->IsParameterValue()) {
- if (kIsDebugBuild
- && invoke->IsInvokeStaticOrDirect()
- && invoke->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck()) {
+ if (kIsDebugBuild &&
+ invoke->IsInvokeStaticOrDirect() &&
+ invoke->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck()) {
// Ensure we do not use the last input of `invoke`, as it
// contains a clinit check which is not an actual argument.
size_t last_input_index = invoke->InputCount() - 1;
@@ -3125,6 +3137,8 @@ void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) {
new_pre_header, old_pre_header, /* replace_if_back_edge= */ false);
}
+// Creates a new two-basic-block loop and inserts it between original loop header and
+// original loop exit; also adjusts dominators, post order and new LoopInformation.
HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header,
HBasicBlock* body,
HBasicBlock* exit) {
@@ -3518,9 +3532,7 @@ std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs) {
static_assert( \
static_cast<uint32_t>(Intrinsics::k ## Name) <= (kAccIntrinsicBits >> CTZ(kAccIntrinsicBits)), \
"Instrinsics enumeration space overflow.");
-#include "intrinsics_list.h"
- INTRINSICS_LIST(CHECK_INTRINSICS_ENUM_VALUES)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(CHECK_INTRINSICS_ENUM_VALUES)
#undef CHECK_INTRINSICS_ENUM_VALUES
// Function that returns whether an intrinsic needs an environment or not.
@@ -3531,9 +3543,7 @@ static inline IntrinsicNeedsEnvironment NeedsEnvironmentIntrinsic(Intrinsics i)
#define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnv, SideEffects, Exceptions, ...) \
case Intrinsics::k ## Name: \
return NeedsEnv;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
}
return kNeedsEnvironment;
@@ -3547,9 +3557,7 @@ static inline IntrinsicSideEffects GetSideEffectsIntrinsic(Intrinsics i) {
#define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnv, SideEffects, Exceptions, ...) \
case Intrinsics::k ## Name: \
return SideEffects;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
}
return kAllSideEffects;
@@ -3563,9 +3571,7 @@ static inline IntrinsicExceptions GetExceptionsIntrinsic(Intrinsics i) {
#define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnv, SideEffects, Exceptions, ...) \
case Intrinsics::k ## Name: \
return Exceptions;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
}
return kCanThrow;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 28112d176a..9cf52183b8 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -403,7 +403,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
has_bounds_checks_(false),
has_try_catch_(false),
has_monitor_operations_(false),
- has_simd_(false),
+ has_traditional_simd_(false),
+ has_predicated_simd_(false),
has_loops_(false),
has_irreducible_loops_(false),
has_direct_critical_native_call_(false),
@@ -708,8 +709,13 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
bool HasMonitorOperations() const { return has_monitor_operations_; }
void SetHasMonitorOperations(bool value) { has_monitor_operations_ = value; }
- bool HasSIMD() const { return has_simd_; }
- void SetHasSIMD(bool value) { has_simd_ = value; }
+ bool HasTraditionalSIMD() { return has_traditional_simd_; }
+ void SetHasTraditionalSIMD(bool value) { has_traditional_simd_ = value; }
+
+ bool HasPredicatedSIMD() { return has_predicated_simd_; }
+ void SetHasPredicatedSIMD(bool value) { has_predicated_simd_ = value; }
+
+ bool HasSIMD() const { return has_traditional_simd_ || has_predicated_simd_; }
bool HasLoops() const { return has_loops_; }
void SetHasLoops(bool value) { has_loops_ = value; }
@@ -822,10 +828,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
// DexRegisterMap to be present to allow deadlock analysis for non-debuggable code.
bool has_monitor_operations_;
- // Flag whether SIMD instructions appear in the graph. If true, the
- // code generators may have to be more careful spilling the wider
+ // Flags whether SIMD (traditional or predicated) instructions appear in the graph.
+ // If either is true, the code generators may have to be more careful spilling the wider
// contents of SIMD registers.
- bool has_simd_;
+ bool has_traditional_simd_;
+ bool has_predicated_simd_;
// Flag whether there are any loops in the graph. We can skip loop
// optimization if it's false.
@@ -1636,7 +1643,9 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(VecStore, VecMemoryOperation) \
M(VecPredSetAll, VecPredSetOperation) \
M(VecPredWhile, VecPredSetOperation) \
- M(VecPredCondition, VecOperation) \
+ M(VecPredToBoolean, VecOperation) \
+ M(VecCondition, VecPredSetOperation) \
+ M(VecPredNot, VecPredSetOperation) \
#define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \
FOR_EACH_CONCRETE_INSTRUCTION_SCALAR_COMMON(M) \
@@ -1659,6 +1668,8 @@ class HLoopInformationOutwardIterator : public ValueObject {
#define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
+#define FOR_EACH_CONCRETE_INSTRUCTION_RISCV64(M)
+
#ifndef ART_ENABLE_CODEGEN_x86
#define FOR_EACH_CONCRETE_INSTRUCTION_X86(M)
#else
@@ -1715,7 +1726,7 @@ FOR_EACH_INSTRUCTION(FORWARD_DECLARATION)
const char* DebugName() const override { return #type; } \
HInstruction* Clone(ArenaAllocator* arena) const override { \
DCHECK(IsClonable()); \
- return new (arena) H##type(*this->As##type()); \
+ return new (arena) H##type(*this); \
} \
void Accept(HGraphVisitor* visitor) override
@@ -2062,12 +2073,12 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> {
ArtMethod* method,
uint32_t dex_pc,
HInstruction* holder)
- : vregs_(number_of_vregs, allocator->Adapter(kArenaAllocEnvironmentVRegs)),
- locations_(allocator->Adapter(kArenaAllocEnvironmentLocations)),
- parent_(nullptr),
- method_(method),
- dex_pc_(dex_pc),
- holder_(holder) {
+ : vregs_(number_of_vregs, allocator->Adapter(kArenaAllocEnvironmentVRegs)),
+ locations_(allocator->Adapter(kArenaAllocEnvironmentLocations)),
+ parent_(nullptr),
+ method_(method),
+ dex_pc_(dex_pc),
+ holder_(holder) {
}
ALWAYS_INLINE HEnvironment(ArenaAllocator* allocator,
@@ -2183,9 +2194,14 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> {
std::ostream& operator<<(std::ostream& os, const HInstruction& rhs);
// Iterates over the Environments
-class HEnvironmentIterator : public ValueObject,
- public std::iterator<std::forward_iterator_tag, HEnvironment*> {
+class HEnvironmentIterator : public ValueObject {
public:
+ using iterator_category = std::forward_iterator_tag;
+ using value_type = HEnvironment*;
+ using difference_type = ptrdiff_t;
+ using pointer = void;
+ using reference = void;
+
explicit HEnvironmentIterator(HEnvironment* cur) : cur_(cur) {}
HEnvironment* operator*() const {
@@ -2355,9 +2371,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
return true;
}
- virtual bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const {
- return false;
- }
+ virtual bool CanDoImplicitNullCheckOn([[maybe_unused]] HInstruction* obj) const { return false; }
// If this instruction will do an implicit null check, return the `HNullCheck` associated
// with it. Otherwise return null.
@@ -2553,7 +2567,9 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
#define INSTRUCTION_TYPE_CAST(type, super) \
const H##type* As##type() const; \
- H##type* As##type();
+ H##type* As##type(); \
+ const H##type* As##type##OrNull() const; \
+ H##type* As##type##OrNull();
FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CAST)
#undef INSTRUCTION_TYPE_CAST
@@ -2568,7 +2584,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
//
// Note: HEnvironment and some other fields are not copied and are set to default values, see
// 'explicit HInstruction(const HInstruction& other)' for details.
- virtual HInstruction* Clone(ArenaAllocator* arena ATTRIBUTE_UNUSED) const {
+ virtual HInstruction* Clone([[maybe_unused]] ArenaAllocator* arena) const {
LOG(FATAL) << "Cloning is not implemented for the instruction " <<
DebugName() << " " << GetId();
UNREACHABLE();
@@ -2596,7 +2612,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
// Returns whether any data encoded in the two instructions is equal.
// This method does not look at the inputs. Both instructions must be
// of the same type, otherwise the method has undefined behavior.
- virtual bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const {
+ virtual bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const {
return false;
}
@@ -2729,7 +2745,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
private:
using InstructionKindField =
- BitField<InstructionKind, kFieldInstructionKind, kFieldInstructionKindSize>;
+ BitField<InstructionKind, kFieldInstructionKind, kFieldInstructionKindSize>;
void FixUpUserRecordsAfterUseInsertion(HUseList<HInstruction*>::iterator fixup_end) {
auto before_use_node = uses_.before_begin();
@@ -2904,9 +2920,14 @@ class HBackwardInstructionIterator : public ValueObject {
};
template <typename InnerIter>
-struct HSTLInstructionIterator : public ValueObject,
- public std::iterator<std::forward_iterator_tag, HInstruction*> {
+struct HSTLInstructionIterator : public ValueObject {
public:
+ using iterator_category = std::forward_iterator_tag;
+ using value_type = HInstruction*;
+ using difference_type = ptrdiff_t;
+ using pointer = void;
+ using reference = void;
+
static_assert(std::is_same_v<InnerIter, HBackwardInstructionIterator> ||
std::is_same_v<InnerIter, HInstructionIterator> ||
std::is_same_v<InnerIter, HInstructionIteratorHandleChanges>,
@@ -3164,7 +3185,7 @@ class HPhi final : public HVariableInputSizeInstruction {
bool IsVRegEquivalentOf(const HInstruction* other) const {
return other != nullptr
&& other->IsPhi()
- && other->AsPhi()->GetBlock() == GetBlock()
+ && other->GetBlock() == GetBlock()
&& other->AsPhi()->GetRegNumber() == GetRegNumber();
}
@@ -3270,7 +3291,7 @@ class HConstant : public HExpression<0> {
class HNullConstant final : public HConstant {
public:
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
@@ -3639,7 +3660,8 @@ class HDeoptimize final : public HVariableInputSizeInstruction {
bool CanBeMoved() const override { return GetPackedFlag<kFieldCanBeMoved>(); }
bool InstructionDataEquals(const HInstruction* other) const override {
- return (other->CanBeMoved() == CanBeMoved()) && (other->AsDeoptimize()->GetKind() == GetKind());
+ return (other->CanBeMoved() == CanBeMoved()) &&
+ (other->AsDeoptimize()->GetDeoptimizationKind() == GetDeoptimizationKind());
}
bool NeedsEnvironment() const override { return true; }
@@ -3827,7 +3849,7 @@ class HUnaryOperation : public HExpression<1> {
DataType::Type GetResultType() const { return GetType(); }
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
@@ -3836,6 +3858,9 @@ class HUnaryOperation : public HExpression<1> {
// be evaluated as a constant, return null.
HConstant* TryStaticEvaluation() const;
+ // Same but for `input` instead of GetInput().
+ HConstant* TryStaticEvaluation(HInstruction* input) const;
+
// Apply this operation to `x`.
virtual HConstant* Evaluate(HIntConstant* x) const = 0;
virtual HConstant* Evaluate(HLongConstant* x) const = 0;
@@ -3903,7 +3928,7 @@ class HBinaryOperation : public HExpression<2> {
}
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
@@ -3912,16 +3937,19 @@ class HBinaryOperation : public HExpression<2> {
// be evaluated as a constant, return null.
HConstant* TryStaticEvaluation() const;
+ // Same but for `left` and `right` instead of GetLeft() and GetRight().
+ HConstant* TryStaticEvaluation(HInstruction* left, HInstruction* right) const;
+
// Apply this operation to `x` and `y`.
- virtual HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
- HNullConstant* y ATTRIBUTE_UNUSED) const {
+ virtual HConstant* Evaluate([[maybe_unused]] HNullConstant* x,
+ [[maybe_unused]] HNullConstant* y) const {
LOG(FATAL) << DebugName() << " is not defined for the (null, null) case.";
UNREACHABLE();
}
virtual HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const = 0;
virtual HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const = 0;
- virtual HConstant* Evaluate(HLongConstant* x ATTRIBUTE_UNUSED,
- HIntConstant* y ATTRIBUTE_UNUSED) const {
+ virtual HConstant* Evaluate([[maybe_unused]] HLongConstant* x,
+ [[maybe_unused]] HIntConstant* y) const {
LOG(FATAL) << DebugName() << " is not defined for the (long, int) case.";
UNREACHABLE();
}
@@ -4049,8 +4077,8 @@ class HEqual final : public HCondition {
bool IsCommutative() const override { return true; }
- HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
- HNullConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HNullConstant* x,
+ [[maybe_unused]] HNullConstant* y) const override {
return MakeConstantCondition(true, GetDexPc());
}
HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
@@ -4096,8 +4124,8 @@ class HNotEqual final : public HCondition {
bool IsCommutative() const override { return true; }
- HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
- HNullConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HNullConstant* x,
+ [[maybe_unused]] HNullConstant* y) const override {
return MakeConstantCondition(false, GetDexPc());
}
HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
@@ -4303,13 +4331,13 @@ class HBelow final : public HCondition {
HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -4345,13 +4373,13 @@ class HBelowOrEqual final : public HCondition {
HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -4387,13 +4415,13 @@ class HAbove final : public HCondition {
HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -4429,13 +4457,13 @@ class HAboveOrEqual final : public HCondition {
HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -4522,7 +4550,7 @@ class HCompare final : public HBinaryOperation {
return GetBias() == ComparisonBias::kGtBias;
}
- static SideEffects SideEffectsForArchRuntimeCalls(DataType::Type type ATTRIBUTE_UNUSED) {
+ static SideEffects SideEffectsForArchRuntimeCalls([[maybe_unused]] DataType::Type type) {
// Comparisons do not require a runtime call in any back end.
return SideEffects::None();
}
@@ -4859,8 +4887,7 @@ class HInvokePolymorphic final : public HInvoke {
// to pass intrinsic information to the HInvokePolymorphic node.
ArtMethod* resolved_method,
MethodReference resolved_method_reference,
- dex::ProtoIndex proto_idx,
- bool enable_intrinsic_opt)
+ dex::ProtoIndex proto_idx)
: HInvoke(kInvokePolymorphic,
allocator,
number_of_arguments,
@@ -4871,9 +4898,8 @@ class HInvokePolymorphic final : public HInvoke {
resolved_method,
resolved_method_reference,
kPolymorphic,
- enable_intrinsic_opt),
- proto_idx_(proto_idx) {
- }
+ /* enable_intrinsic_opt= */ true),
+ proto_idx_(proto_idx) {}
bool IsClonable() const override { return true; }
@@ -5015,7 +5041,7 @@ class HInvokeStaticOrDirect final : public HInvoke {
return input_records;
}
- bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override {
+ bool CanDoImplicitNullCheckOn([[maybe_unused]] HInstruction* obj) const override {
// We do not access the method via object reference, so we cannot do an implicit null check.
// TODO: for intrinsics we can generate implicit null checks.
return false;
@@ -5599,10 +5625,14 @@ class HMin final : public HBinaryOperation {
ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
}
// TODO: Evaluation for floating-point values.
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; }
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; }
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
+ return nullptr;
+ }
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
+ return nullptr;
+ }
DECLARE_INSTRUCTION(Min);
@@ -5634,10 +5664,14 @@ class HMax final : public HBinaryOperation {
ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
}
// TODO: Evaluation for floating-point values.
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; }
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; }
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
+ return nullptr;
+ }
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
+ return nullptr;
+ }
DECLARE_INSTRUCTION(Max);
@@ -5699,7 +5733,7 @@ class HDivZeroCheck final : public HExpression<1> {
bool IsClonable() const override { return true; }
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
@@ -5736,18 +5770,18 @@ class HShl final : public HBinaryOperation {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED,
- HLongConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HLongConstant* value,
+ [[maybe_unused]] HLongConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for the (long, long) case.";
UNREACHABLE();
}
- HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED,
- HFloatConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* value,
+ [[maybe_unused]] HFloatConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED,
- HDoubleConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* value,
+ [[maybe_unused]] HDoubleConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -5782,18 +5816,18 @@ class HShr final : public HBinaryOperation {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED,
- HLongConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HLongConstant* value,
+ [[maybe_unused]] HLongConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for the (long, long) case.";
UNREACHABLE();
}
- HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED,
- HFloatConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* value,
+ [[maybe_unused]] HFloatConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED,
- HDoubleConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* value,
+ [[maybe_unused]] HDoubleConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -5830,18 +5864,18 @@ class HUShr final : public HBinaryOperation {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED,
- HLongConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HLongConstant* value,
+ [[maybe_unused]] HLongConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for the (long, long) case.";
UNREACHABLE();
}
- HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED,
- HFloatConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* value,
+ [[maybe_unused]] HFloatConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED,
- HDoubleConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* value,
+ [[maybe_unused]] HDoubleConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -5873,13 +5907,13 @@ class HAnd final : public HBinaryOperation {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -5911,13 +5945,13 @@ class HOr final : public HBinaryOperation {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -5949,13 +5983,13 @@ class HXor final : public HBinaryOperation {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -5993,18 +6027,18 @@ class HRor final : public HBinaryOperation {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED,
- HLongConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HLongConstant* value,
+ [[maybe_unused]] HLongConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for the (long, long) case.";
UNREACHABLE();
}
- HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED,
- HFloatConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* value,
+ [[maybe_unused]] HFloatConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED,
- HDoubleConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* value,
+ [[maybe_unused]] HDoubleConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -6067,7 +6101,7 @@ class HNot final : public HUnaryOperation {
}
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
@@ -6079,11 +6113,11 @@ class HNot final : public HUnaryOperation {
HConstant* Evaluate(HLongConstant* x) const override {
return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -6101,7 +6135,7 @@ class HBooleanNot final : public HUnaryOperation {
}
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
@@ -6113,15 +6147,15 @@ class HBooleanNot final : public HUnaryOperation {
HConstant* Evaluate(HIntConstant* x) const override {
return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* x ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HLongConstant* x) const override {
LOG(FATAL) << DebugName() << " is not defined for long values";
UNREACHABLE();
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -6148,7 +6182,7 @@ class HTypeConversion final : public HExpression<1> {
bool IsClonable() const override { return true; }
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
// Return whether the conversion is implicit. This includes conversion to the same type.
@@ -6160,6 +6194,9 @@ class HTypeConversion final : public HExpression<1> {
// containing the result. If the input cannot be converted, return nullptr.
HConstant* TryStaticEvaluation() const;
+ // Same but for `input` instead of GetInput().
+ HConstant* TryStaticEvaluation(HInstruction* input) const;
+
DECLARE_INSTRUCTION(TypeConversion);
protected:
@@ -6180,7 +6217,7 @@ class HNullCheck final : public HExpression<1> {
bool IsClonable() const override { return true; }
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
@@ -6511,12 +6548,12 @@ class HArrayGet final : public HExpression<2> {
HInstruction* index,
DataType::Type type,
uint32_t dex_pc)
- : HArrayGet(array,
- index,
- type,
- SideEffects::ArrayReadOfType(type),
- dex_pc,
- /* is_string_char_at= */ false) {
+ : HArrayGet(array,
+ index,
+ type,
+ SideEffects::ArrayReadOfType(type),
+ dex_pc,
+ /* is_string_char_at= */ false) {
}
HArrayGet(HInstruction* array,
@@ -6533,10 +6570,10 @@ class HArrayGet final : public HExpression<2> {
bool IsClonable() const override { return true; }
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
- bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override {
+ bool CanDoImplicitNullCheckOn([[maybe_unused]] HInstruction* obj) const override {
// TODO: We can be smarter here.
// Currently, unless the array is the result of NewArray, the array access is always
// preceded by some form of null NullCheck necessary for the bounds check, usually
@@ -6640,7 +6677,7 @@ class HArraySet final : public HExpression<3> {
// Can throw ArrayStoreException.
bool CanThrow() const override { return NeedsTypeCheck(); }
- bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override {
+ bool CanDoImplicitNullCheckOn([[maybe_unused]] HInstruction* obj) const override {
// TODO: Same as for ArrayGet.
return false;
}
@@ -6746,7 +6783,7 @@ class HArrayLength final : public HExpression<1> {
bool IsClonable() const override { return true; }
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
bool CanDoImplicitNullCheckOn(HInstruction* obj) const override {
@@ -6790,7 +6827,7 @@ class HBoundsCheck final : public HExpression<2> {
bool IsClonable() const override { return true; }
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
@@ -7000,17 +7037,15 @@ class HLoadClass final : public HInstruction {
bool CanCallRuntime() const {
return NeedsAccessCheck() ||
MustGenerateClinitCheck() ||
- GetLoadKind() == LoadKind::kRuntimeCall ||
- GetLoadKind() == LoadKind::kBssEntry;
+ NeedsBss() ||
+ GetLoadKind() == LoadKind::kRuntimeCall;
}
bool CanThrow() const override {
return NeedsAccessCheck() ||
MustGenerateClinitCheck() ||
// If the class is in the boot image, the lookup in the runtime call cannot throw.
- ((GetLoadKind() == LoadKind::kRuntimeCall ||
- GetLoadKind() == LoadKind::kBssEntry) &&
- !IsInBootImage());
+ ((GetLoadKind() == LoadKind::kRuntimeCall || NeedsBss()) && !IsInBootImage());
}
ReferenceTypeInfo GetLoadedClassRTI() {
@@ -7423,7 +7458,7 @@ class HClinitCheck final : public HExpression<1> {
}
// TODO: Make ClinitCheck clonable.
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
@@ -8343,7 +8378,7 @@ class HSelect final : public HExpression<3> {
HInstruction* GetCondition() const { return InputAt(2); }
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
@@ -8351,6 +8386,12 @@ class HSelect final : public HExpression<3> {
return GetTrueValue()->CanBeNull() || GetFalseValue()->CanBeNull();
}
+ void UpdateType() {
+ DCHECK_EQ(HPhi::ToPhiType(GetTrueValue()->GetType()),
+ HPhi::ToPhiType(GetFalseValue()->GetType()));
+ SetPackedField<TypeField>(HPhi::ToPhiType(GetTrueValue()->GetType()));
+ }
+
DECLARE_INSTRUCTION(Select);
protected:
@@ -8513,7 +8554,7 @@ class HIntermediateAddress final : public HExpression<2> {
bool IsClonable() const override { return true; }
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
bool IsActualObject() const override { return false; }
@@ -8550,7 +8591,7 @@ class HGraphVisitor : public ValueObject {
graph_(graph) {}
virtual ~HGraphVisitor() {}
- virtual void VisitInstruction(HInstruction* instruction ATTRIBUTE_UNUSED) {}
+ virtual void VisitInstruction([[maybe_unused]] HInstruction* instruction) {}
virtual void VisitBasicBlock(HBasicBlock* block);
// Visit the graph following basic block insertion order.
@@ -8623,7 +8664,7 @@ class CloneAndReplaceInstructionVisitor final : public HGraphDelegateVisitor {
DISALLOW_COPY_AND_ASSIGN(CloneAndReplaceInstructionVisitor);
};
-// Iterator over the blocks that art part of the loop. Includes blocks part
+// Iterator over the blocks that are part of the loop; includes blocks which are part
// of an inner loop. The order in which the blocks are iterated is on their
// block id.
class HBlocksInLoopIterator : public ValueObject {
@@ -8656,7 +8697,7 @@ class HBlocksInLoopIterator : public ValueObject {
DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopIterator);
};
-// Iterator over the blocks that art part of the loop. Includes blocks part
+// Iterator over the blocks that are part of the loop; includes blocks which are part
// of an inner loop. The order in which the blocks are iterated is reverse
// post order.
class HBlocksInLoopReversePostOrderIterator : public ValueObject {
@@ -8689,6 +8730,39 @@ class HBlocksInLoopReversePostOrderIterator : public ValueObject {
DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopReversePostOrderIterator);
};
+// Iterator over the blocks that are part of the loop; includes blocks which are part
+// of an inner loop. The order in which the blocks are iterated is post order.
+class HBlocksInLoopPostOrderIterator : public ValueObject {
+ public:
+ explicit HBlocksInLoopPostOrderIterator(const HLoopInformation& info)
+ : blocks_in_loop_(info.GetBlocks()),
+ blocks_(info.GetHeader()->GetGraph()->GetReversePostOrder()),
+ index_(blocks_.size() - 1) {
+ if (!blocks_in_loop_.IsBitSet(blocks_[index_]->GetBlockId())) {
+ Advance();
+ }
+ }
+
+ bool Done() const { return index_ < 0; }
+ HBasicBlock* Current() const { return blocks_[index_]; }
+ void Advance() {
+ --index_;
+ for (; index_ >= 0; --index_) {
+ if (blocks_in_loop_.IsBitSet(blocks_[index_]->GetBlockId())) {
+ break;
+ }
+ }
+ }
+
+ private:
+ const BitVector& blocks_in_loop_;
+ const ArenaVector<HBasicBlock*>& blocks_;
+
+ int32_t index_;
+
+ DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopPostOrderIterator);
+};
+
// Returns int64_t value of a properly typed constant.
inline int64_t Int64FromConstant(HConstant* constant) {
if (constant->IsIntConstant()) {
@@ -8752,10 +8826,18 @@ inline bool IsZeroBitPattern(HInstruction* instruction) {
#define INSTRUCTION_TYPE_CAST(type, super) \
inline const H##type* HInstruction::As##type() const { \
- return Is##type() ? down_cast<const H##type*>(this) : nullptr; \
+ DCHECK(Is##type()); \
+ return down_cast<const H##type*>(this); \
} \
inline H##type* HInstruction::As##type() { \
- return Is##type() ? static_cast<H##type*>(this) : nullptr; \
+ DCHECK(Is##type()); \
+ return down_cast<H##type*>(this); \
+ } \
+ inline const H##type* HInstruction::As##type##OrNull() const { \
+ return Is##type() ? down_cast<const H##type*>(this) : nullptr; \
+ } \
+ inline H##type* HInstruction::As##type##OrNull() { \
+ return Is##type() ? down_cast<H##type*>(this) : nullptr; \
}
FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CAST)
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index 27e610328f..4b0187d536 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -105,13 +105,13 @@ class HBitwiseNegatedRight final : public HBinaryOperation {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -160,7 +160,7 @@ class HIntermediateAddressIndex final : public HExpression<3> {
bool IsClonable() const override { return true; }
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
bool IsActualObject() const override { return false; }
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 73f6c40a0d..6a60d6be01 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -1384,8 +1384,8 @@ class HVecPredWhile final : public HVecPredSetOperation {
static constexpr size_t kCondKind = HVecOperation::kNumberOfVectorOpPackedBits;
static constexpr size_t kCondKindSize =
MinimumBitsToStore(static_cast<size_t>(CondKind::kLast));
- static constexpr size_t kNumberOfVecPredConditionPackedBits = kCondKind + kCondKindSize;
- static_assert(kNumberOfVecPredConditionPackedBits <= kMaxNumberOfPackedBits,
+ static constexpr size_t kNumberOfVecPredWhilePackedBits = kCondKind + kCondKindSize;
+ static_assert(kNumberOfVecPredWhilePackedBits <= kMaxNumberOfPackedBits,
"Too many packed fields.");
using CondKindField = BitField<CondKind, kCondKind, kCondKindSize>;
@@ -1395,13 +1395,13 @@ class HVecPredWhile final : public HVecPredSetOperation {
// Evaluates the predicate condition (PCondKind) for a vector predicate; outputs
// a scalar boolean value result.
//
-// Note: as VecPredCondition can be also predicated, only active elements (determined by the
+// Note: as VecPredToBoolean can be also predicated, only active elements (determined by the
// instruction's governing predicate) of the input vector predicate are used for condition
// evaluation.
//
// Note: this instruction is currently used as a workaround for the fact that IR instructions
// can't have more than one output.
-class HVecPredCondition final : public HVecOperation {
+class HVecPredToBoolean final : public HVecOperation {
public:
// To get more info on the condition kinds please see "2.2 Process state, PSTATE" section of
// "ARM Architecture Reference Manual Supplement. The Scalable Vector Extension (SVE),
@@ -1418,13 +1418,13 @@ class HVecPredCondition final : public HVecOperation {
kEnumLast = kPLast
};
- HVecPredCondition(ArenaAllocator* allocator,
+ HVecPredToBoolean(ArenaAllocator* allocator,
HInstruction* input,
PCondKind pred_cond,
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecOperation(kVecPredCondition,
+ : HVecOperation(kVecPredToBoolean,
allocator,
packed_type,
SideEffects::None(),
@@ -1447,19 +1447,86 @@ class HVecPredCondition final : public HVecOperation {
return GetPackedField<CondKindField>();
}
- DECLARE_INSTRUCTION(VecPredCondition);
+ DECLARE_INSTRUCTION(VecPredToBoolean);
protected:
// Additional packed bits.
static constexpr size_t kCondKind = HVecOperation::kNumberOfVectorOpPackedBits;
static constexpr size_t kCondKindSize =
MinimumBitsToStore(static_cast<size_t>(PCondKind::kEnumLast));
- static constexpr size_t kNumberOfVecPredConditionPackedBits = kCondKind + kCondKindSize;
- static_assert(kNumberOfVecPredConditionPackedBits <= kMaxNumberOfPackedBits,
+ static constexpr size_t kNumberOfVecPredToBooleanPackedBits = kCondKind + kCondKindSize;
+ static_assert(kNumberOfVecPredToBooleanPackedBits <= kMaxNumberOfPackedBits,
"Too many packed fields.");
using CondKindField = BitField<PCondKind, kCondKind, kCondKindSize>;
- DEFAULT_COPY_CONSTRUCTOR(VecPredCondition);
+ DEFAULT_COPY_CONSTRUCTOR(VecPredToBoolean);
+};
+
+// Evaluates condition for pairwise elements in two input vectors and sets the result
+// as an output predicate vector.
+//
+// viz. [ p1, .. , pn ] = [ x1 OP y1 , x2 OP y2, .. , xn OP yn] where OP is CondKind
+// condition.
+//
+// Currently only kEqual is supported by this vector instruction - we don't even define
+// the kCondType here.
+// TODO: support other condition ops.
+class HVecCondition final : public HVecPredSetOperation {
+ public:
+ HVecCondition(ArenaAllocator* allocator,
+ HInstruction* left,
+ HInstruction* right,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc) :
+ HVecPredSetOperation(kVecCondition,
+ allocator,
+ packed_type,
+ SideEffects::None(),
+ /* number_of_inputs= */ 2,
+ vector_length,
+ dex_pc) {
+ DCHECK(left->IsVecOperation());
+ DCHECK(!left->IsVecPredSetOperation());
+ DCHECK(right->IsVecOperation());
+ DCHECK(!right->IsVecPredSetOperation());
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+
+ DECLARE_INSTRUCTION(VecCondition);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(VecCondition);
+};
+
+// Inverts every component in the predicate vector.
+//
+// viz. [ p1, .. , pn ] = [ !px1 , !px2 , .. , !pxn ].
+class HVecPredNot final : public HVecPredSetOperation {
+ public:
+ HVecPredNot(ArenaAllocator* allocator,
+ HInstruction* input,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc) :
+ HVecPredSetOperation(kVecPredNot,
+ allocator,
+ packed_type,
+ SideEffects::None(),
+ /* number_of_inputs= */ 1,
+ vector_length,
+ dex_pc) {
+ DCHECK(input->IsVecOperation());
+ DCHECK(input->IsVecPredSetOperation());
+
+ SetRawInputAt(0, input);
+ }
+
+ DECLARE_INSTRUCTION(VecPredNot);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(VecPredNot);
};
} // namespace art
diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h
index e246390aa5..14d9823355 100644
--- a/compiler/optimizing/nodes_x86.h
+++ b/compiler/optimizing/nodes_x86.h
@@ -149,13 +149,13 @@ class HX86AndNot final : public HBinaryOperation {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -196,11 +196,11 @@ class HX86MaskOrResetLeastSetBit final : public HUnaryOperation {
HConstant* Evaluate(HLongConstant* x) const override {
return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x) const override {
LOG(FATAL) << DebugName() << "is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x) const override {
LOG(FATAL) << DebugName() << "is not defined for double values";
UNREACHABLE();
}
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index 12e9a1046d..4f20b55c7e 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -313,8 +313,8 @@ ArenaVector<HOptimization*> ConstructOptimizations(
opt = new (allocator) x86::X86MemoryOperandGeneration(graph, codegen, stats);
break;
case OptimizationPass::kInstructionSimplifierX86:
- opt = new (allocator) x86::InstructionSimplifierX86(graph, codegen, stats);
- break;
+ opt = new (allocator) x86::InstructionSimplifierX86(graph, codegen, stats);
+ break;
#endif
#ifdef ART_ENABLE_CODEGEN_x86_64
case OptimizationPass::kInstructionSimplifierX86_64:
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index f12e748941..632c32a70b 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -89,7 +89,7 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper {
void Finish() {
code_gen_->GenerateFrameExit();
- code_gen_->Finalize(&code_allocator_);
+ code_gen_->Finalize();
}
void Check(InstructionSet isa,
@@ -97,7 +97,7 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper {
const std::vector<uint8_t>& expected_asm,
const std::vector<uint8_t>& expected_cfi) {
// Get the outputs.
- ArrayRef<const uint8_t> actual_asm = code_allocator_.GetMemory();
+ ArrayRef<const uint8_t> actual_asm = code_gen_->GetCode();
Assembler* opt_asm = code_gen_->GetAssembler();
ArrayRef<const uint8_t> actual_cfi(*(opt_asm->cfi().data()));
@@ -123,27 +123,9 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper {
}
private:
- class InternalCodeAllocator : public CodeAllocator {
- public:
- InternalCodeAllocator() {}
-
- uint8_t* Allocate(size_t size) override {
- memory_.resize(size);
- return memory_.data();
- }
-
- ArrayRef<const uint8_t> GetMemory() const override { return ArrayRef<const uint8_t>(memory_); }
-
- private:
- std::vector<uint8_t> memory_;
-
- DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
- };
-
HGraph* graph_;
std::unique_ptr<CodeGenerator> code_gen_;
ArenaVector<HBasicBlock*> blocks_;
- InternalCodeAllocator code_allocator_;
};
#define TEST_ISA(isa) \
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 00eb6e5c42..040c2449a7 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -69,28 +69,6 @@ static constexpr size_t kArenaAllocatorMemoryReportThreshold = 8 * MB;
static constexpr const char* kPassNameSeparator = "$";
/**
- * Used by the code generator, to allocate the code in a vector.
- */
-class CodeVectorAllocator final : public CodeAllocator {
- public:
- explicit CodeVectorAllocator(ArenaAllocator* allocator)
- : memory_(allocator->Adapter(kArenaAllocCodeBuffer)) {}
-
- uint8_t* Allocate(size_t size) override {
- memory_.resize(size);
- return &memory_[0];
- }
-
- ArrayRef<const uint8_t> GetMemory() const override { return ArrayRef<const uint8_t>(memory_); }
- uint8_t* GetData() { return memory_.data(); }
-
- private:
- ArenaVector<uint8_t> memory_;
-
- DISALLOW_COPY_AND_ASSIGN(CodeVectorAllocator);
-};
-
-/**
* Filter to apply to the visualizer. Methods whose name contain that filter will
* be dumped.
*/
@@ -361,7 +339,6 @@ class OptimizingCompiler final : public Compiler {
// Create a 'CompiledMethod' for an optimized graph.
CompiledMethod* Emit(ArenaAllocator* allocator,
- CodeVectorAllocator* code_allocator,
CodeGenerator* codegen,
bool is_intrinsic,
const dex::CodeItem* item) const;
@@ -372,10 +349,8 @@ class OptimizingCompiler final : public Compiler {
// 1) Builds the graph. Returns null if it failed to build it.
// 2) Transforms the graph to SSA. Returns null if it failed.
// 3) Runs optimizations on the graph, including register allocator.
- // 4) Generates code with the `code_allocator` provided.
CodeGenerator* TryCompile(ArenaAllocator* allocator,
ArenaStack* arena_stack,
- CodeVectorAllocator* code_allocator,
const DexCompilationUnit& dex_compilation_unit,
ArtMethod* method,
CompilationKind compilation_kind,
@@ -383,7 +358,6 @@ class OptimizingCompiler final : public Compiler {
CodeGenerator* TryCompileIntrinsic(ArenaAllocator* allocator,
ArenaStack* arena_stack,
- CodeVectorAllocator* code_allocator,
const DexCompilationUnit& dex_compilation_unit,
ArtMethod* method,
VariableSizedHandleScope* handles) const;
@@ -440,24 +414,33 @@ void OptimizingCompiler::DumpInstructionSetFeaturesToCfg() const {
std::string isa_string =
std::string("isa:") + GetInstructionSetString(features->GetInstructionSet());
std::string features_string = "isa_features:" + features->GetFeatureString();
+ std::string read_barrier_type = "none";
+ if (gUseReadBarrier) {
+ if (art::kUseBakerReadBarrier)
+ read_barrier_type = "baker";
+ else if (art::kUseTableLookupReadBarrier)
+ read_barrier_type = "tablelookup";
+ }
+ std::string read_barrier_string = ART_FORMAT("read_barrier_type:{}", read_barrier_type);
// It is assumed that visualizer_output_ is empty when calling this function, hence the fake
// compilation block containing the ISA features will be printed at the beginning of the .cfg
// file.
- *visualizer_output_
- << HGraphVisualizer::InsertMetaDataAsCompilationBlock(isa_string + ' ' + features_string);
+ *visualizer_output_ << HGraphVisualizer::InsertMetaDataAsCompilationBlock(
+ isa_string + ' ' + features_string + ' ' + read_barrier_string);
}
-bool OptimizingCompiler::CanCompileMethod(uint32_t method_idx ATTRIBUTE_UNUSED,
- const DexFile& dex_file ATTRIBUTE_UNUSED) const {
+bool OptimizingCompiler::CanCompileMethod([[maybe_unused]] uint32_t method_idx,
+ [[maybe_unused]] const DexFile& dex_file) const {
return true;
}
static bool IsInstructionSetSupported(InstructionSet instruction_set) {
- return instruction_set == InstructionSet::kArm
- || instruction_set == InstructionSet::kArm64
- || instruction_set == InstructionSet::kThumb2
- || instruction_set == InstructionSet::kX86
- || instruction_set == InstructionSet::kX86_64;
+ return instruction_set == InstructionSet::kArm ||
+ instruction_set == InstructionSet::kArm64 ||
+ instruction_set == InstructionSet::kThumb2 ||
+ instruction_set == InstructionSet::kRiscv64 ||
+ instruction_set == InstructionSet::kX86 ||
+ instruction_set == InstructionSet::kX86_64;
}
bool OptimizingCompiler::RunBaselineOptimizations(HGraph* graph,
@@ -469,7 +452,7 @@ bool OptimizingCompiler::RunBaselineOptimizations(HGraph* graph,
case InstructionSet::kThumb2:
case InstructionSet::kArm: {
OptimizationDef arm_optimizations[] = {
- OptDef(OptimizationPass::kCriticalNativeAbiFixupArm),
+ OptDef(OptimizationPass::kCriticalNativeAbiFixupArm),
};
return RunOptimizations(graph,
codegen,
@@ -481,7 +464,7 @@ bool OptimizingCompiler::RunBaselineOptimizations(HGraph* graph,
#ifdef ART_ENABLE_CODEGEN_x86
case InstructionSet::kX86: {
OptimizationDef x86_optimizations[] = {
- OptDef(OptimizationPass::kPcRelativeFixupsX86),
+ OptDef(OptimizationPass::kPcRelativeFixupsX86),
};
return RunOptimizations(graph,
codegen,
@@ -508,11 +491,11 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph,
case InstructionSet::kThumb2:
case InstructionSet::kArm: {
OptimizationDef arm_optimizations[] = {
- OptDef(OptimizationPass::kInstructionSimplifierArm),
- OptDef(OptimizationPass::kSideEffectsAnalysis),
- OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
- OptDef(OptimizationPass::kCriticalNativeAbiFixupArm),
- OptDef(OptimizationPass::kScheduling)
+ OptDef(OptimizationPass::kInstructionSimplifierArm),
+ OptDef(OptimizationPass::kSideEffectsAnalysis),
+ OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
+ OptDef(OptimizationPass::kCriticalNativeAbiFixupArm),
+ OptDef(OptimizationPass::kScheduling)
};
return RunOptimizations(graph,
codegen,
@@ -524,10 +507,10 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph,
#ifdef ART_ENABLE_CODEGEN_arm64
case InstructionSet::kArm64: {
OptimizationDef arm64_optimizations[] = {
- OptDef(OptimizationPass::kInstructionSimplifierArm64),
- OptDef(OptimizationPass::kSideEffectsAnalysis),
- OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
- OptDef(OptimizationPass::kScheduling)
+ OptDef(OptimizationPass::kInstructionSimplifierArm64),
+ OptDef(OptimizationPass::kSideEffectsAnalysis),
+ OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
+ OptDef(OptimizationPass::kScheduling)
};
return RunOptimizations(graph,
codegen,
@@ -539,11 +522,11 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph,
#ifdef ART_ENABLE_CODEGEN_x86
case InstructionSet::kX86: {
OptimizationDef x86_optimizations[] = {
- OptDef(OptimizationPass::kInstructionSimplifierX86),
- OptDef(OptimizationPass::kSideEffectsAnalysis),
- OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
- OptDef(OptimizationPass::kPcRelativeFixupsX86),
- OptDef(OptimizationPass::kX86MemoryOperandGeneration)
+ OptDef(OptimizationPass::kInstructionSimplifierX86),
+ OptDef(OptimizationPass::kSideEffectsAnalysis),
+ OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
+ OptDef(OptimizationPass::kPcRelativeFixupsX86),
+ OptDef(OptimizationPass::kX86MemoryOperandGeneration)
};
return RunOptimizations(graph,
codegen,
@@ -555,10 +538,10 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph,
#ifdef ART_ENABLE_CODEGEN_x86_64
case InstructionSet::kX86_64: {
OptimizationDef x86_64_optimizations[] = {
- OptDef(OptimizationPass::kInstructionSimplifierX86_64),
- OptDef(OptimizationPass::kSideEffectsAnalysis),
- OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
- OptDef(OptimizationPass::kX86MemoryOperandGeneration)
+ OptDef(OptimizationPass::kInstructionSimplifierX86_64),
+ OptDef(OptimizationPass::kSideEffectsAnalysis),
+ OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
+ OptDef(OptimizationPass::kX86MemoryOperandGeneration)
};
return RunOptimizations(graph,
codegen,
@@ -633,68 +616,68 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
}
OptimizationDef optimizations[] = {
- // Initial optimizations.
- OptDef(OptimizationPass::kConstantFolding),
- OptDef(OptimizationPass::kInstructionSimplifier),
- OptDef(OptimizationPass::kDeadCodeElimination,
- "dead_code_elimination$initial"),
- // Inlining.
- OptDef(OptimizationPass::kInliner),
- // Simplification (if inlining occurred, or if we analyzed the invoke as "always throwing").
- OptDef(OptimizationPass::kConstantFolding,
- "constant_folding$after_inlining",
- OptimizationPass::kInliner),
- OptDef(OptimizationPass::kInstructionSimplifier,
- "instruction_simplifier$after_inlining",
- OptimizationPass::kInliner),
- OptDef(OptimizationPass::kDeadCodeElimination,
- "dead_code_elimination$after_inlining",
- OptimizationPass::kInliner),
- // GVN.
- OptDef(OptimizationPass::kSideEffectsAnalysis,
- "side_effects$before_gvn"),
- OptDef(OptimizationPass::kGlobalValueNumbering),
- // Simplification (TODO: only if GVN occurred).
- OptDef(OptimizationPass::kSelectGenerator),
- OptDef(OptimizationPass::kAggressiveConstantFolding,
- "constant_folding$after_gvn"),
- OptDef(OptimizationPass::kInstructionSimplifier,
- "instruction_simplifier$after_gvn"),
- OptDef(OptimizationPass::kDeadCodeElimination,
- "dead_code_elimination$after_gvn"),
- // High-level optimizations.
- OptDef(OptimizationPass::kSideEffectsAnalysis,
- "side_effects$before_licm"),
- OptDef(OptimizationPass::kInvariantCodeMotion),
- OptDef(OptimizationPass::kInductionVarAnalysis),
- OptDef(OptimizationPass::kBoundsCheckElimination),
- OptDef(OptimizationPass::kLoopOptimization),
- // Simplification.
- OptDef(OptimizationPass::kConstantFolding,
- "constant_folding$after_loop_opt"),
- OptDef(OptimizationPass::kAggressiveInstructionSimplifier,
- "instruction_simplifier$after_loop_opt"),
- OptDef(OptimizationPass::kDeadCodeElimination,
- "dead_code_elimination$after_loop_opt"),
- // Other high-level optimizations.
- OptDef(OptimizationPass::kLoadStoreElimination),
- OptDef(OptimizationPass::kCHAGuardOptimization),
- OptDef(OptimizationPass::kCodeSinking),
- // Simplification.
- OptDef(OptimizationPass::kConstantFolding,
- "constant_folding$before_codegen"),
- // The codegen has a few assumptions that only the instruction simplifier
- // can satisfy. For example, the code generator does not expect to see a
- // HTypeConversion from a type to the same type.
- OptDef(OptimizationPass::kAggressiveInstructionSimplifier,
- "instruction_simplifier$before_codegen"),
- // Simplification may result in dead code that should be removed prior to
- // code generation.
- OptDef(OptimizationPass::kDeadCodeElimination,
- "dead_code_elimination$before_codegen"),
- // Eliminate constructor fences after code sinking to avoid
- // complicated sinking logic to split a fence with many inputs.
- OptDef(OptimizationPass::kConstructorFenceRedundancyElimination)
+ // Initial optimizations.
+ OptDef(OptimizationPass::kConstantFolding),
+ OptDef(OptimizationPass::kInstructionSimplifier),
+ OptDef(OptimizationPass::kDeadCodeElimination,
+ "dead_code_elimination$initial"),
+ // Inlining.
+ OptDef(OptimizationPass::kInliner),
+ // Simplification (if inlining occurred, or if we analyzed the invoke as "always throwing").
+ OptDef(OptimizationPass::kConstantFolding,
+ "constant_folding$after_inlining",
+ OptimizationPass::kInliner),
+ OptDef(OptimizationPass::kInstructionSimplifier,
+ "instruction_simplifier$after_inlining",
+ OptimizationPass::kInliner),
+ OptDef(OptimizationPass::kDeadCodeElimination,
+ "dead_code_elimination$after_inlining",
+ OptimizationPass::kInliner),
+ // GVN.
+ OptDef(OptimizationPass::kSideEffectsAnalysis,
+ "side_effects$before_gvn"),
+ OptDef(OptimizationPass::kGlobalValueNumbering),
+ // Simplification (TODO: only if GVN occurred).
+ OptDef(OptimizationPass::kSelectGenerator),
+ OptDef(OptimizationPass::kAggressiveConstantFolding,
+ "constant_folding$after_gvn"),
+ OptDef(OptimizationPass::kInstructionSimplifier,
+ "instruction_simplifier$after_gvn"),
+ OptDef(OptimizationPass::kDeadCodeElimination,
+ "dead_code_elimination$after_gvn"),
+ // High-level optimizations.
+ OptDef(OptimizationPass::kSideEffectsAnalysis,
+ "side_effects$before_licm"),
+ OptDef(OptimizationPass::kInvariantCodeMotion),
+ OptDef(OptimizationPass::kInductionVarAnalysis),
+ OptDef(OptimizationPass::kBoundsCheckElimination),
+ OptDef(OptimizationPass::kLoopOptimization),
+ // Simplification.
+ OptDef(OptimizationPass::kConstantFolding,
+ "constant_folding$after_loop_opt"),
+ OptDef(OptimizationPass::kAggressiveInstructionSimplifier,
+ "instruction_simplifier$after_loop_opt"),
+ OptDef(OptimizationPass::kDeadCodeElimination,
+ "dead_code_elimination$after_loop_opt"),
+ // Other high-level optimizations.
+ OptDef(OptimizationPass::kLoadStoreElimination),
+ OptDef(OptimizationPass::kCHAGuardOptimization),
+ OptDef(OptimizationPass::kCodeSinking),
+ // Simplification.
+ OptDef(OptimizationPass::kConstantFolding,
+ "constant_folding$before_codegen"),
+ // The codegen has a few assumptions that only the instruction simplifier
+ // can satisfy. For example, the code generator does not expect to see a
+ // HTypeConversion from a type to the same type.
+ OptDef(OptimizationPass::kAggressiveInstructionSimplifier,
+ "instruction_simplifier$before_codegen"),
+ // Simplification may result in dead code that should be removed prior to
+ // code generation.
+ OptDef(OptimizationPass::kDeadCodeElimination,
+ "dead_code_elimination$before_codegen"),
+ // Eliminate constructor fences after code sinking to avoid
+ // complicated sinking logic to split a fence with many inputs.
+ OptDef(OptimizationPass::kConstructorFenceRedundancyElimination)
};
RunOptimizations(graph,
codegen,
@@ -719,7 +702,6 @@ static ArenaVector<linker::LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator*
}
CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator,
- CodeVectorAllocator* code_allocator,
CodeGenerator* codegen,
bool is_intrinsic,
const dex::CodeItem* code_item_for_osr_check) const {
@@ -729,7 +711,7 @@ CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator,
CompiledCodeStorage* storage = GetCompiledCodeStorage();
CompiledMethod* compiled_method = storage->CreateCompiledMethod(
codegen->GetInstructionSet(),
- code_allocator->GetMemory(),
+ codegen->GetCode(),
ArrayRef<const uint8_t>(stack_map),
ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
ArrayRef<const linker::LinkerPatch>(linker_patches),
@@ -747,9 +729,92 @@ CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator,
return compiled_method;
}
+// TODO(riscv64): Remove this check when codegen is complete.
+#ifdef ART_ENABLE_CODEGEN_riscv64
+static bool CanAssembleGraphForRiscv64(HGraph* graph) {
+ for (HBasicBlock* block : graph->GetPostOrder()) {
+ // Phis are implemented (and they have no code to emit), so check only non-Phi instructions.
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ switch (it.Current()->GetKind()) {
+ case HInstruction::kParallelMove:
+ // ParallelMove is supported but it is inserted by the register allocator
+ // and this check is done before register allocation.
+ LOG(FATAL) << "Unexpected ParallelMove before register allocation!";
+ UNREACHABLE();
+ case HInstruction::kExit:
+ case HInstruction::kGoto:
+ case HInstruction::kParameterValue:
+ case HInstruction::kReturn:
+ case HInstruction::kReturnVoid:
+ case HInstruction::kSuspendCheck:
+ case HInstruction::kDoubleConstant:
+ case HInstruction::kFloatConstant:
+ case HInstruction::kIntConstant:
+ case HInstruction::kLongConstant:
+ case HInstruction::kNullConstant:
+ case HInstruction::kLoadClass:
+ case HInstruction::kLoadString:
+ case HInstruction::kLoadMethodHandle:
+ case HInstruction::kLoadMethodType:
+ case HInstruction::kInstanceFieldGet:
+ case HInstruction::kStaticFieldGet:
+ case HInstruction::kArrayGet:
+ case HInstruction::kAbove:
+ case HInstruction::kAboveOrEqual:
+ case HInstruction::kBelow:
+ case HInstruction::kBelowOrEqual:
+ case HInstruction::kEqual:
+ case HInstruction::kGreaterThan:
+ case HInstruction::kGreaterThanOrEqual:
+ case HInstruction::kLessThan:
+ case HInstruction::kLessThanOrEqual:
+ case HInstruction::kNotEqual:
+ case HInstruction::kCompare:
+ case HInstruction::kIf:
+ case HInstruction::kAdd:
+ case HInstruction::kAnd:
+ case HInstruction::kOr:
+ case HInstruction::kSub:
+ case HInstruction::kXor:
+ case HInstruction::kRor:
+ case HInstruction::kShl:
+ case HInstruction::kShr:
+ case HInstruction::kUShr:
+ case HInstruction::kAbs:
+ case HInstruction::kBooleanNot:
+ case HInstruction::kMul:
+ case HInstruction::kNeg:
+ case HInstruction::kNot:
+ case HInstruction::kMin:
+ case HInstruction::kMax:
+ case HInstruction::kInvokeVirtual:
+ case HInstruction::kInvokeInterface:
+ case HInstruction::kCurrentMethod:
+ case HInstruction::kNullCheck:
+ break;
+ case HInstruction::kInvokeStaticOrDirect:
+ if (it.Current()->AsInvokeStaticOrDirect()->GetCodePtrLocation() ==
+ CodePtrLocation::kCallCriticalNative &&
+ it.Current()->AsInvokeStaticOrDirect()->GetNumberOfArguments() >= 8u) {
+ // TODO(riscv64): If there are more than 8 FP args, some may be passed in GPRs
+ // and this requires a `CriticalNativeAbiFixupRiscv64` pass similar to the one
+ // we have for ARM. This is not yet implemented. For simplicity, we reject all
+ // direct @CriticalNative calls with more than 8 args.
+ return false;
+ }
+ break;
+ default:
+ // Unimplemented instruction.
+ return false;
+ }
+ }
+ }
+ return true;
+}
+#endif
+
CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
ArenaStack* arena_stack,
- CodeVectorAllocator* code_allocator,
const DexCompilationUnit& dex_compilation_unit,
ArtMethod* method,
CompilationKind compilation_kind,
@@ -906,6 +971,15 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
WriteBarrierElimination(graph, compilation_stats_.get()).Run();
}
+ // TODO(riscv64): Remove this check when codegen is complete.
+#ifdef ART_ENABLE_CODEGEN_riscv64
+ if (instruction_set == InstructionSet::kRiscv64 && !CanAssembleGraphForRiscv64(graph)) {
+ MaybeRecordStat(compilation_stats_.get(),
+ MethodCompilationStat::kNotCompiledUnsupportedIsa);
+ return nullptr;
+ }
+#endif
+
RegisterAllocator::Strategy regalloc_strategy =
compiler_options.GetRegisterAllocationStrategy();
AllocateRegisters(graph,
@@ -914,7 +988,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
regalloc_strategy,
compilation_stats_.get());
- codegen->Compile(code_allocator);
+ codegen->Compile();
pass_observer.DumpDisassembly();
MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiledBytecode);
@@ -924,7 +998,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
CodeGenerator* OptimizingCompiler::TryCompileIntrinsic(
ArenaAllocator* allocator,
ArenaStack* arena_stack,
- CodeVectorAllocator* code_allocator,
const DexCompilationUnit& dex_compilation_unit,
ArtMethod* method,
VariableSizedHandleScope* handles) const {
@@ -986,9 +1059,9 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic(
}
OptimizationDef optimizations[] = {
- // The codegen has a few assumptions that only the instruction simplifier
- // can satisfy.
- OptDef(OptimizationPass::kInstructionSimplifier),
+ // The codegen has a few assumptions that only the instruction simplifier
+ // can satisfy.
+ OptDef(OptimizationPass::kInstructionSimplifier),
};
RunOptimizations(graph,
codegen.get(),
@@ -1002,6 +1075,15 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic(
WriteBarrierElimination(graph, compilation_stats_.get()).Run();
}
+ // TODO(riscv64): Remove this check when codegen is complete.
+#ifdef ART_ENABLE_CODEGEN_riscv64
+ if (instruction_set == InstructionSet::kRiscv64 && !CanAssembleGraphForRiscv64(graph)) {
+ MaybeRecordStat(compilation_stats_.get(),
+ MethodCompilationStat::kNotCompiledUnsupportedIsa);
+ return nullptr;
+ }
+#endif
+
AllocateRegisters(graph,
codegen.get(),
&pass_observer,
@@ -1013,7 +1095,7 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic(
return nullptr;
}
- codegen->Compile(code_allocator);
+ codegen->Compile();
pass_observer.DumpDisassembly();
VLOG(compiler) << "Compiled intrinsic: " << method->GetIntrinsic()
@@ -1037,7 +1119,6 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item,
DCHECK(runtime->IsAotCompiler());
ArenaAllocator allocator(runtime->GetArenaPool());
ArenaStack arena_stack(runtime->GetArenaPool());
- CodeVectorAllocator code_allocator(&allocator);
std::unique_ptr<CodeGenerator> codegen;
bool compiled_intrinsic = false;
{
@@ -1071,7 +1152,6 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item,
codegen.reset(
TryCompileIntrinsic(&allocator,
&arena_stack,
- &code_allocator,
dex_compilation_unit,
method,
&handles));
@@ -1083,7 +1163,6 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item,
codegen.reset(
TryCompile(&allocator,
&arena_stack,
- &code_allocator,
dex_compilation_unit,
method,
compiler_options.IsBaseline()
@@ -1094,7 +1173,6 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item,
}
if (codegen.get() != nullptr) {
compiled_method = Emit(&allocator,
- &code_allocator,
codegen.get(),
compiled_intrinsic,
compiled_intrinsic ? nullptr : code_item);
@@ -1115,7 +1193,9 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item,
if (kIsDebugBuild &&
compiler_options.CompileArtTest() &&
- IsInstructionSetSupported(compiler_options.GetInstructionSet())) {
+ IsInstructionSetSupported(compiler_options.GetInstructionSet()) &&
+ // TODO(riscv64): Enable this check when codegen is complete.
+ compiler_options.GetInstructionSet() != InstructionSet::kRiscv64) {
// For testing purposes, we put a special marker on method names
// that should be compiled with this compiler (when the
// instruction set is supported). This makes sure we're not
@@ -1177,19 +1257,16 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags,
/*verified_method=*/ nullptr,
dex_cache,
compiling_class);
- CodeVectorAllocator code_allocator(&allocator);
// Go to native so that we don't block GC during compilation.
ScopedThreadSuspension sts(soa.Self(), ThreadState::kNative);
std::unique_ptr<CodeGenerator> codegen(
TryCompileIntrinsic(&allocator,
&arena_stack,
- &code_allocator,
dex_compilation_unit,
method,
&handles));
if (codegen != nullptr) {
return Emit(&allocator,
- &code_allocator,
codegen.get(),
/*is_intrinsic=*/ true,
/*item=*/ nullptr);
@@ -1221,7 +1298,7 @@ Compiler* CreateOptimizingCompiler(const CompilerOptions& compiler_options,
return new OptimizingCompiler(compiler_options, storage);
}
-bool EncodeArtMethodInInlineInfo(ArtMethod* method ATTRIBUTE_UNUSED) {
+bool EncodeArtMethodInInlineInfo([[maybe_unused]] ArtMethod* method) {
// Note: the runtime is null only for unit testing.
return Runtime::Current() == nullptr || !Runtime::Current()->IsAotCompiler();
}
@@ -1328,7 +1405,6 @@ bool OptimizingCompiler::JitCompile(Thread* self,
debug_info,
/* is_full_debug_info= */ compiler_options.GetGenerateDebugInfo(),
compilation_kind,
- /* has_should_deoptimize_flag= */ false,
cha_single_implementation_list)) {
code_cache->Free(self, region, reserved_code.data(), reserved_data.data());
return false;
@@ -1342,7 +1418,6 @@ bool OptimizingCompiler::JitCompile(Thread* self,
}
ArenaStack arena_stack(runtime->GetJitArenaPool());
- CodeVectorAllocator code_allocator(&allocator);
VariableSizedHandleScope handles(self);
std::unique_ptr<CodeGenerator> codegen;
@@ -1365,7 +1440,6 @@ bool OptimizingCompiler::JitCompile(Thread* self,
codegen.reset(
TryCompile(&allocator,
&arena_stack,
- &code_allocator,
dex_compilation_unit,
method,
compilation_kind,
@@ -1381,7 +1455,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
ArrayRef<const uint8_t> reserved_data;
if (!code_cache->Reserve(self,
region,
- code_allocator.GetMemory().size(),
+ codegen->GetAssembler()->CodeSize(),
stack_map.size(),
/*number_of_roots=*/codegen->GetNumberOfJitRoots(),
method,
@@ -1394,7 +1468,9 @@ bool OptimizingCompiler::JitCompile(Thread* self,
const uint8_t* roots_data = reserved_data.data();
std::vector<Handle<mirror::Object>> roots;
- codegen->EmitJitRoots(code_allocator.GetData(), roots_data, &roots);
+ codegen->EmitJitRoots(const_cast<uint8_t*>(codegen->GetAssembler()->CodeBufferBaseAddress()),
+ roots_data,
+ &roots);
// The root Handle<>s filled by the codegen reference entries in the VariableSizedHandleScope.
DCHECK(std::all_of(roots.begin(),
roots.end(),
@@ -1418,7 +1494,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
info.is_optimized = true;
info.is_code_address_text_relative = false;
info.code_address = reinterpret_cast<uintptr_t>(code);
- info.code_size = code_allocator.GetMemory().size();
+ info.code_size = codegen->GetAssembler()->CodeSize(),
info.frame_size_in_bytes = codegen->GetFrameSize();
info.code_info = stack_map.size() == 0 ? nullptr : stack_map.data();
info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data());
@@ -1429,22 +1505,23 @@ bool OptimizingCompiler::JitCompile(Thread* self,
region,
method,
reserved_code,
- code_allocator.GetMemory(),
+ codegen->GetCode(),
reserved_data,
roots,
ArrayRef<const uint8_t>(stack_map),
debug_info,
/* is_full_debug_info= */ compiler_options.GetGenerateDebugInfo(),
compilation_kind,
- codegen->GetGraph()->HasShouldDeoptimizeFlag(),
codegen->GetGraph()->GetCHASingleImplementationList())) {
+ CHECK_EQ(CodeInfo::HasShouldDeoptimizeFlag(stack_map.data()),
+ codegen->GetGraph()->HasShouldDeoptimizeFlag());
code_cache->Free(self, region, reserved_code.data(), reserved_data.data());
return false;
}
Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed());
if (jit_logger != nullptr) {
- jit_logger->WriteLog(code, code_allocator.GetMemory().size(), method);
+ jit_logger->WriteLog(code, codegen->GetAssembler()->CodeSize(), method);
}
if (kArenaAllocatorCountAllocations) {
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index a1c05e9cad..d2b993280d 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -81,8 +81,8 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap {
message_ << ")";
}
- void SpillScratch(int reg ATTRIBUTE_UNUSED) override {}
- void RestoreScratch(int reg ATTRIBUTE_UNUSED) override {}
+ void SpillScratch([[maybe_unused]] int reg) override {}
+ void RestoreScratch([[maybe_unused]] int reg) override {}
std::string GetMessage() const {
return message_.str();
@@ -126,7 +126,7 @@ class TestParallelMoveResolverNoSwap : public ParallelMoveResolverNoSwap {
return scratch;
}
- void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) override {}
+ void FreeScratchLocation([[maybe_unused]] Location loc) override {}
void EmitMove(size_t index) override {
MoveOperands* move = moves_[index];
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index d3da3d3ce1..56341f106f 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -62,7 +62,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor {
}
void VisitReturn(HReturn* ret) override {
- HConstant* value = ret->InputAt(0)->AsConstant();
+ HConstant* value = ret->InputAt(0)->AsConstantOrNull();
if ((value != nullptr && DataType::IsFloatingPointType(value->GetType()))) {
ReplaceInput(ret, value, 0, true);
}
@@ -95,7 +95,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor {
}
void BinaryFP(HBinaryOperation* bin) {
- HConstant* rhs = bin->InputAt(1)->AsConstant();
+ HConstant* rhs = bin->InputAt(1)->AsConstantOrNull();
if (rhs != nullptr && DataType::IsFloatingPointType(rhs->GetType())) {
ReplaceInput(bin, rhs, 1, false);
}
@@ -193,7 +193,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor {
}
void HandleInvoke(HInvoke* invoke) {
- HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
+ HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirectOrNull();
// If this is an invoke-static/-direct with PC-relative addressing (within boot image
// or using .bss or .data.bimg.rel.ro), we need the PC-relative address base.
@@ -207,7 +207,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor {
base_added = true;
}
- HInvokeInterface* invoke_interface = invoke->AsInvokeInterface();
+ HInvokeInterface* invoke_interface = invoke->AsInvokeInterfaceOrNull();
if (invoke_interface != nullptr &&
IsPcRelativeMethodLoadKind(invoke_interface->GetHiddenArgumentLoadKind())) {
HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(invoke);
@@ -219,7 +219,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor {
// Ensure that we can load FP arguments from the constant area.
HInputsRef inputs = invoke->GetInputs();
for (size_t i = 0; i < inputs.size(); i++) {
- HConstant* input = inputs[i]->AsConstant();
+ HConstant* input = inputs[i]->AsConstantOrNull();
if (input != nullptr && DataType::IsFloatingPointType(input->GetType())) {
ReplaceInput(invoke, input, i, true);
}
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 91bae5f49b..3a5cceed9a 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -254,7 +254,7 @@ static void BoundTypeForClassCheck(HInstruction* check) {
HInstruction* input_two = compare->InputAt(1);
HLoadClass* load_class = input_one->IsLoadClass()
? input_one->AsLoadClass()
- : input_two->AsLoadClass();
+ : input_two->AsLoadClassOrNull();
if (load_class == nullptr) {
return;
}
@@ -335,7 +335,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitBasicBlock(HBasicBlock* block) {
}
void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfNotNull(HBasicBlock* block) {
- HIf* ifInstruction = block->GetLastInstruction()->AsIf();
+ HIf* ifInstruction = block->GetLastInstruction()->AsIfOrNull();
if (ifInstruction == nullptr) {
return;
}
@@ -453,7 +453,7 @@ static bool MatchIfInstanceOf(HIf* ifInstruction,
// If that's the case insert an HBoundType instruction to bound the type of `x`
// to `ClassX` in the scope of the dominated blocks.
void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfInstanceOf(HBasicBlock* block) {
- HIf* ifInstruction = block->GetLastInstruction()->AsIf();
+ HIf* ifInstruction = block->GetLastInstruction()->AsIfOrNull();
if (ifInstruction == nullptr) {
return;
}
@@ -539,9 +539,14 @@ void ReferenceTypePropagation::RTPVisitor::UpdateReferenceTypeInfo(HInstruction*
DCHECK_EQ(instr->GetType(), DataType::Type::kReference);
ScopedObjectAccess soa(Thread::Current());
- ObjPtr<mirror::DexCache> dex_cache = FindDexCacheWithHint(soa.Self(), dex_file, hint_dex_cache_);
- ObjPtr<mirror::Class> klass = Runtime::Current()->GetClassLinker()->LookupResolvedType(
- type_idx, dex_cache, dex_cache->GetClassLoader());
+ StackHandleScope<2> hs(soa.Self());
+ Handle<mirror::DexCache> dex_cache =
+ hs.NewHandle(FindDexCacheWithHint(soa.Self(), dex_file, hint_dex_cache_));
+ Handle<mirror::ClassLoader> loader = hs.NewHandle(dex_cache->GetClassLoader());
+ ObjPtr<mirror::Class> klass = Runtime::Current()->GetClassLinker()->ResolveType(
+ type_idx, dex_cache, loader);
+ DCHECK_EQ(klass == nullptr, soa.Self()->IsExceptionPending());
+ soa.Self()->ClearException(); // Clean up the exception left by type resolution if any.
SetClassAsTypeInfo(instr, klass, is_exact);
}
@@ -704,7 +709,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) {
}
void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast) {
- HBoundType* bound_type = check_cast->GetNext()->AsBoundType();
+ HBoundType* bound_type = check_cast->GetNext()->AsBoundTypeOrNull();
if (bound_type == nullptr || bound_type->GetUpperBound().IsValid()) {
// The next instruction is not an uninitialized BoundType. This must be
// an RTP pass after SsaBuilder and we do not need to do anything.
diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc
index 2b012fcd67..ffd94e56b5 100644
--- a/compiler/optimizing/reference_type_propagation_test.cc
+++ b/compiler/optimizing/reference_type_propagation_test.cc
@@ -468,7 +468,7 @@ TEST_P(LoopReferenceTypePropagationTestGroup, RunVisitTest) {
LoopOptions lo(GetParam());
std::default_random_engine g(
lo.initial_null_state_ != InitialNullState::kTrueRandom ? 42 : std::rand());
- std::uniform_int_distribution<bool> uid(false, true);
+ std::uniform_int_distribution<int> uid(0, 1);
RunVisitListTest([&](std::vector<HInstruction*>& lst, HInstruction* null_input) {
auto pred_null = false;
auto next_null = [&]() {
@@ -482,7 +482,7 @@ TEST_P(LoopReferenceTypePropagationTestGroup, RunVisitTest) {
return pred_null;
case InitialNullState::kRandomSetSeed:
case InitialNullState::kTrueRandom:
- return uid(g);
+ return uid(g) > 0;
}
};
HPhi* nulled_phi = lo.null_insertion_ >= 0 ? lst[lo.null_insertion_]->AsPhi() : nullptr;
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index 53e11f2c3d..a4b1698b8d 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -531,9 +531,9 @@ void RegisterAllocationResolver::AddInputMoveFor(HInstruction* input,
HInstruction* previous = user->GetPrevious();
HParallelMove* move = nullptr;
- if (previous == nullptr
- || !previous->IsParallelMove()
- || previous->GetLifetimePosition() < user->GetLifetimePosition()) {
+ if (previous == nullptr ||
+ !previous->IsParallelMove() ||
+ previous->GetLifetimePosition() < user->GetLifetimePosition()) {
move = new (allocator_) HParallelMove(allocator_);
move->SetLifetimePosition(user->GetLifetimePosition());
user->GetBlock()->InsertInstructionBefore(move, user);
@@ -593,7 +593,7 @@ void RegisterAllocationResolver::InsertParallelMoveAt(size_t position,
} else if (IsInstructionEnd(position)) {
// Move must happen after the instruction.
DCHECK(!at->IsControlFlow());
- move = at->GetNext()->AsParallelMove();
+ move = at->GetNext()->AsParallelMoveOrNull();
// This is a parallel move for connecting siblings in a same block. We need to
// differentiate it with moves for connecting blocks, and input moves.
if (move == nullptr || move->GetLifetimePosition() > position) {
@@ -604,15 +604,15 @@ void RegisterAllocationResolver::InsertParallelMoveAt(size_t position,
} else {
// Move must happen before the instruction.
HInstruction* previous = at->GetPrevious();
- if (previous == nullptr
- || !previous->IsParallelMove()
- || previous->GetLifetimePosition() != position) {
+ if (previous == nullptr ||
+ !previous->IsParallelMove() ||
+ previous->GetLifetimePosition() != position) {
// If the previous is a parallel move, then its position must be lower
// than the given `position`: it was added just after the non-parallel
// move instruction that precedes `instruction`.
- DCHECK(previous == nullptr
- || !previous->IsParallelMove()
- || previous->GetLifetimePosition() < position);
+ DCHECK(previous == nullptr ||
+ !previous->IsParallelMove() ||
+ previous->GetLifetimePosition() < position);
move = new (allocator_) HParallelMove(allocator_);
move->SetLifetimePosition(position);
at->GetBlock()->InsertInstructionBefore(move, at);
@@ -643,8 +643,9 @@ void RegisterAllocationResolver::InsertParallelMoveAtExitOf(HBasicBlock* block,
// This is a parallel move for connecting blocks. We need to differentiate
// it with moves for connecting siblings in a same block, and output moves.
size_t position = last->GetLifetimePosition();
- if (previous == nullptr || !previous->IsParallelMove()
- || previous->AsParallelMove()->GetLifetimePosition() != position) {
+ if (previous == nullptr ||
+ !previous->IsParallelMove() ||
+ previous->AsParallelMove()->GetLifetimePosition() != position) {
move = new (allocator_) HParallelMove(allocator_);
move->SetLifetimePosition(position);
block->InsertInstructionBefore(move, last);
@@ -662,7 +663,7 @@ void RegisterAllocationResolver::InsertParallelMoveAtEntryOf(HBasicBlock* block,
if (source.Equals(destination)) return;
HInstruction* first = block->GetFirstInstruction();
- HParallelMove* move = first->AsParallelMove();
+ HParallelMove* move = first->AsParallelMoveOrNull();
size_t position = block->GetLifetimeStart();
// This is a parallel move for connecting blocks. We need to differentiate
// it with moves for connecting siblings in a same block, and input moves.
@@ -686,7 +687,7 @@ void RegisterAllocationResolver::InsertMoveAfter(HInstruction* instruction,
}
size_t position = instruction->GetLifetimePosition() + 1;
- HParallelMove* move = instruction->GetNext()->AsParallelMove();
+ HParallelMove* move = instruction->GetNext()->AsParallelMoveOrNull();
// This is a parallel move for moving the output of an instruction. We need
// to differentiate with input moves, moves for connecting siblings in a
// and moves for connecting blocks.
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index e4c2d74908..f8b057d4a8 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -23,7 +23,6 @@
#include "base/scoped_arena_containers.h"
#include "base/bit_vector-inl.h"
#include "code_generator.h"
-#include "register_allocator_graph_color.h"
#include "register_allocator_linear_scan.h"
#include "ssa_liveness_analysis.h"
@@ -45,8 +44,8 @@ std::unique_ptr<RegisterAllocator> RegisterAllocator::Create(ScopedArenaAllocato
return std::unique_ptr<RegisterAllocator>(
new (allocator) RegisterAllocatorLinearScan(allocator, codegen, analysis));
case kRegisterAllocatorGraphColor:
- return std::unique_ptr<RegisterAllocator>(
- new (allocator) RegisterAllocatorGraphColor(allocator, codegen, analysis));
+ LOG(FATAL) << "Graph coloring register allocator has been removed.";
+ UNREACHABLE();
default:
LOG(FATAL) << "Invalid register allocation strategy: " << strategy;
UNREACHABLE();
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
deleted file mode 100644
index a7c891d4e7..0000000000
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ /dev/null
@@ -1,2086 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "register_allocator_graph_color.h"
-
-#include "code_generator.h"
-#include "linear_order.h"
-#include "register_allocation_resolver.h"
-#include "ssa_liveness_analysis.h"
-#include "thread-current-inl.h"
-
-namespace art HIDDEN {
-
-// Highest number of registers that we support for any platform. This can be used for std::bitset,
-// for example, which needs to know its size at compile time.
-static constexpr size_t kMaxNumRegs = 32;
-
-// The maximum number of graph coloring attempts before triggering a DCHECK.
-// This is meant to catch changes to the graph coloring algorithm that undermine its forward
-// progress guarantees. Forward progress for the algorithm means splitting live intervals on
-// every graph coloring attempt so that eventually the interference graph will be sparse enough
-// to color. The main threat to forward progress is trying to split short intervals which cannot be
-// split further; this could cause infinite looping because the interference graph would never
-// change. This is avoided by prioritizing short intervals before long ones, so that long
-// intervals are split when coloring fails.
-static constexpr size_t kMaxGraphColoringAttemptsDebug = 100;
-
-// We always want to avoid spilling inside loops.
-static constexpr size_t kLoopSpillWeightMultiplier = 10;
-
-// If we avoid moves in single jump blocks, we can avoid jumps to jumps.
-static constexpr size_t kSingleJumpBlockWeightMultiplier = 2;
-
-// We avoid moves in blocks that dominate the exit block, since these blocks will
-// be executed on every path through the method.
-static constexpr size_t kDominatesExitBlockWeightMultiplier = 2;
-
-enum class CoalesceKind {
- kAdjacentSibling, // Prevents moves at interval split points.
- kFixedOutputSibling, // Prevents moves from a fixed output location.
- kFixedInput, // Prevents moves into a fixed input location.
- kNonlinearControlFlow, // Prevents moves between blocks.
- kPhi, // Prevents phi resolution moves.
- kFirstInput, // Prevents a single input move.
- kAnyInput, // May lead to better instruction selection / smaller encodings.
-};
-
-std::ostream& operator<<(std::ostream& os, const CoalesceKind& kind) {
- return os << static_cast<typename std::underlying_type<CoalesceKind>::type>(kind);
-}
-
-static size_t LoopDepthAt(HBasicBlock* block) {
- HLoopInformation* loop_info = block->GetLoopInformation();
- size_t depth = 0;
- while (loop_info != nullptr) {
- ++depth;
- loop_info = loop_info->GetPreHeader()->GetLoopInformation();
- }
- return depth;
-}
-
-// Return the runtime cost of inserting a move instruction at the specified location.
-static size_t CostForMoveAt(size_t position, const SsaLivenessAnalysis& liveness) {
- HBasicBlock* block = liveness.GetBlockFromPosition(position / 2);
- DCHECK(block != nullptr);
- size_t cost = 1;
- if (block->IsSingleJump()) {
- cost *= kSingleJumpBlockWeightMultiplier;
- }
- if (block->Dominates(block->GetGraph()->GetExitBlock())) {
- cost *= kDominatesExitBlockWeightMultiplier;
- }
- for (size_t loop_depth = LoopDepthAt(block); loop_depth > 0; --loop_depth) {
- cost *= kLoopSpillWeightMultiplier;
- }
- return cost;
-}
-
-// In general, we estimate coalesce priority by whether it will definitely avoid a move,
-// and by how likely it is to create an interference graph that's harder to color.
-static size_t ComputeCoalescePriority(CoalesceKind kind,
- size_t position,
- const SsaLivenessAnalysis& liveness) {
- if (kind == CoalesceKind::kAnyInput) {
- // This type of coalescing can affect instruction selection, but not moves, so we
- // give it the lowest priority.
- return 0;
- } else {
- return CostForMoveAt(position, liveness);
- }
-}
-
-enum class CoalesceStage {
- kWorklist, // Currently in the iterative coalescing worklist.
- kActive, // Not in a worklist, but could be considered again during iterative coalescing.
- kInactive, // No longer considered until last-chance coalescing.
- kDefunct, // Either the two nodes interfere, or have already been coalesced.
-};
-
-std::ostream& operator<<(std::ostream& os, const CoalesceStage& stage) {
- return os << static_cast<typename std::underlying_type<CoalesceStage>::type>(stage);
-}
-
-// Represents a coalesce opportunity between two nodes.
-struct CoalesceOpportunity : public ArenaObject<kArenaAllocRegisterAllocator> {
- CoalesceOpportunity(InterferenceNode* a,
- InterferenceNode* b,
- CoalesceKind kind,
- size_t position,
- const SsaLivenessAnalysis& liveness)
- : node_a(a),
- node_b(b),
- stage(CoalesceStage::kWorklist),
- priority(ComputeCoalescePriority(kind, position, liveness)) {}
-
- // Compare two coalesce opportunities based on their priority.
- // Return true if lhs has a lower priority than that of rhs.
- static bool CmpPriority(const CoalesceOpportunity* lhs,
- const CoalesceOpportunity* rhs) {
- return lhs->priority < rhs->priority;
- }
-
- InterferenceNode* const node_a;
- InterferenceNode* const node_b;
-
- // The current stage of this coalesce opportunity, indicating whether it is in a worklist,
- // and whether it should still be considered.
- CoalesceStage stage;
-
- // The priority of this coalesce opportunity, based on heuristics.
- const size_t priority;
-};
-
-enum class NodeStage {
- kInitial, // Uninitialized.
- kPrecolored, // Marks fixed nodes.
- kSafepoint, // Marks safepoint nodes.
- kPrunable, // Marks uncolored nodes in the interference graph.
- kSimplifyWorklist, // Marks non-move-related nodes with degree less than the number of registers.
- kFreezeWorklist, // Marks move-related nodes with degree less than the number of registers.
- kSpillWorklist, // Marks nodes with degree greater or equal to the number of registers.
- kPruned // Marks nodes already pruned from the interference graph.
-};
-
-std::ostream& operator<<(std::ostream& os, const NodeStage& stage) {
- return os << static_cast<typename std::underlying_type<NodeStage>::type>(stage);
-}
-
-// Returns the estimated cost of spilling a particular live interval.
-static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysis& liveness) {
- if (interval->HasRegister()) {
- // Intervals with a fixed register cannot be spilled.
- return std::numeric_limits<float>::min();
- }
-
- size_t length = interval->GetLength();
- if (length == 1) {
- // Tiny intervals should have maximum priority, since they cannot be split any further.
- return std::numeric_limits<float>::max();
- }
-
- size_t use_weight = 0;
- if (interval->GetDefinedBy() != nullptr && interval->DefinitionRequiresRegister()) {
- // Cost for spilling at a register definition point.
- use_weight += CostForMoveAt(interval->GetStart() + 1, liveness);
- }
-
- // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e.
- // [interval->GetStart() + 1, interval->GetEnd() + 1)
- auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
- interval->GetUses().end(),
- interval->GetStart() + 1u,
- interval->GetEnd() + 1u);
- for (const UsePosition& use : matching_use_range) {
- if (use.GetUser() != nullptr && use.RequiresRegister()) {
- // Cost for spilling at a register use point.
- use_weight += CostForMoveAt(use.GetUser()->GetLifetimePosition() - 1, liveness);
- }
- }
-
- // We divide by the length of the interval because we want to prioritize
- // short intervals; we do not benefit much if we split them further.
- return static_cast<float>(use_weight) / static_cast<float>(length);
-}
-
-// Interference nodes make up the interference graph, which is the primary data structure in
-// graph coloring register allocation. Each node represents a single live interval, and contains
-// a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory,
-// pre-colored nodes never contain outgoing edges (only incoming ones).
-//
-// As nodes are pruned from the interference graph, incoming edges of the pruned node are removed,
-// but outgoing edges remain in order to later color the node based on the colors of its neighbors.
-//
-// Note that a pair interval is represented by a single node in the interference graph, which
-// essentially requires two colors. One consequence of this is that the degree of a node is not
-// necessarily equal to the number of adjacent nodes--instead, the degree reflects the maximum
-// number of colors with which a node could interfere. We model this by giving edges different
-// weights (1 or 2) to control how much it increases the degree of adjacent nodes.
-// For example, the edge between two single nodes will have weight 1. On the other hand,
-// the edge between a single node and a pair node will have weight 2. This is because the pair
-// node could block up to two colors for the single node, and because the single node could
-// block an entire two-register aligned slot for the pair node.
-// The degree is defined this way because we use it to decide whether a node is guaranteed a color,
-// and thus whether it is safe to prune it from the interference graph early on.
-class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> {
- public:
- InterferenceNode(LiveInterval* interval,
- const SsaLivenessAnalysis& liveness)
- : stage(NodeStage::kInitial),
- interval_(interval),
- adjacent_nodes_(nullptr),
- coalesce_opportunities_(nullptr),
- out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0),
- alias_(this),
- spill_weight_(ComputeSpillWeight(interval, liveness)),
- requires_color_(interval->RequiresRegister()),
- needs_spill_slot_(false) {
- DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval";
- }
-
- void AddInterference(InterferenceNode* other,
- bool guaranteed_not_interfering_yet,
- ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>>* storage) {
- DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences";
- DCHECK_NE(this, other) << "Should not create self loops in the interference graph";
- DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another";
- DCHECK_NE(stage, NodeStage::kPruned);
- DCHECK_NE(other->stage, NodeStage::kPruned);
- if (adjacent_nodes_ == nullptr) {
- ScopedArenaVector<InterferenceNode*>::allocator_type adapter(storage->get_allocator());
- storage->emplace_back(adapter);
- adjacent_nodes_ = &storage->back();
- }
- if (guaranteed_not_interfering_yet) {
- DCHECK(!ContainsElement(GetAdjacentNodes(), other));
- adjacent_nodes_->push_back(other);
- out_degree_ += EdgeWeightWith(other);
- } else {
- if (!ContainsElement(GetAdjacentNodes(), other)) {
- adjacent_nodes_->push_back(other);
- out_degree_ += EdgeWeightWith(other);
- }
- }
- }
-
- void RemoveInterference(InterferenceNode* other) {
- DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node";
- DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning";
- if (adjacent_nodes_ != nullptr) {
- auto it = std::find(adjacent_nodes_->begin(), adjacent_nodes_->end(), other);
- if (it != adjacent_nodes_->end()) {
- adjacent_nodes_->erase(it);
- out_degree_ -= EdgeWeightWith(other);
- }
- }
- }
-
- bool ContainsInterference(InterferenceNode* other) const {
- DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences";
- DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences";
- return ContainsElement(GetAdjacentNodes(), other);
- }
-
- LiveInterval* GetInterval() const {
- return interval_;
- }
-
- ArrayRef<InterferenceNode*> GetAdjacentNodes() const {
- return adjacent_nodes_ != nullptr
- ? ArrayRef<InterferenceNode*>(*adjacent_nodes_)
- : ArrayRef<InterferenceNode*>();
- }
-
- size_t GetOutDegree() const {
- // Pre-colored nodes have infinite degree.
- DCHECK_IMPLIES(IsPrecolored(), out_degree_ == std::numeric_limits<size_t>::max());
- return out_degree_;
- }
-
- void AddCoalesceOpportunity(CoalesceOpportunity* opportunity,
- ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>>* storage) {
- if (coalesce_opportunities_ == nullptr) {
- ScopedArenaVector<CoalesceOpportunity*>::allocator_type adapter(storage->get_allocator());
- storage->emplace_back(adapter);
- coalesce_opportunities_ = &storage->back();
- }
- coalesce_opportunities_->push_back(opportunity);
- }
-
- void ClearCoalesceOpportunities() {
- coalesce_opportunities_ = nullptr;
- }
-
- bool IsMoveRelated() const {
- for (CoalesceOpportunity* opportunity : GetCoalesceOpportunities()) {
- if (opportunity->stage == CoalesceStage::kWorklist ||
- opportunity->stage == CoalesceStage::kActive) {
- return true;
- }
- }
- return false;
- }
-
- // Return whether this node already has a color.
- // Used to find fixed nodes in the interference graph before coloring.
- bool IsPrecolored() const {
- return interval_->HasRegister();
- }
-
- bool IsPair() const {
- return interval_->HasHighInterval();
- }
-
- void SetAlias(InterferenceNode* rep) {
- DCHECK_NE(rep->stage, NodeStage::kPruned);
- DCHECK_EQ(this, alias_) << "Should only set a node's alias once";
- alias_ = rep;
- }
-
- InterferenceNode* GetAlias() {
- if (alias_ != this) {
- // Recurse in order to flatten tree of alias pointers.
- alias_ = alias_->GetAlias();
- }
- return alias_;
- }
-
- ArrayRef<CoalesceOpportunity*> GetCoalesceOpportunities() const {
- return coalesce_opportunities_ != nullptr
- ? ArrayRef<CoalesceOpportunity*>(*coalesce_opportunities_)
- : ArrayRef<CoalesceOpportunity*>();
- }
-
- float GetSpillWeight() const {
- return spill_weight_;
- }
-
- bool RequiresColor() const {
- return requires_color_;
- }
-
- // We give extra weight to edges adjacent to pair nodes. See the general comment on the
- // interference graph above.
- size_t EdgeWeightWith(const InterferenceNode* other) const {
- return (IsPair() || other->IsPair()) ? 2 : 1;
- }
-
- bool NeedsSpillSlot() const {
- return needs_spill_slot_;
- }
-
- void SetNeedsSpillSlot() {
- needs_spill_slot_ = true;
- }
-
- // The current stage of this node, indicating which worklist it belongs to.
- NodeStage stage;
-
- private:
- // The live interval that this node represents.
- LiveInterval* const interval_;
-
- // All nodes interfering with this one.
- // We use an unsorted vector as a set, since a tree or hash set is too heavy for the
- // set sizes that we encounter. Using a vector leads to much better performance.
- ScopedArenaVector<InterferenceNode*>* adjacent_nodes_; // Owned by ColoringIteration.
-
- // Interference nodes that this node should be coalesced with to reduce moves.
- ScopedArenaVector<CoalesceOpportunity*>* coalesce_opportunities_; // Owned by ColoringIteration.
-
- // The maximum number of colors with which this node could interfere. This could be more than
- // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes.
- // We use "out" degree because incoming edges come from nodes already pruned from the graph,
- // and do not affect the coloring of this node.
- // Pre-colored nodes are treated as having infinite degree.
- size_t out_degree_;
-
- // The node representing this node in the interference graph.
- // Initially set to `this`, and only changed if this node is coalesced into another.
- InterferenceNode* alias_;
-
- // The cost of splitting and spilling this interval to the stack.
- // Nodes with a higher spill weight should be prioritized when assigning registers.
- // This is essentially based on use density and location; short intervals with many uses inside
- // deeply nested loops have a high spill weight.
- const float spill_weight_;
-
- const bool requires_color_;
-
- bool needs_spill_slot_;
-
- DISALLOW_COPY_AND_ASSIGN(InterferenceNode);
-};
-
-// The order in which we color nodes is important. To guarantee forward progress,
-// we prioritize intervals that require registers, and after that we prioritize
-// short intervals. That way, if we fail to color a node, it either won't require a
-// register, or it will be a long interval that can be split in order to make the
-// interference graph sparser.
-// To improve code quality, we prioritize intervals used frequently in deeply nested loops.
-// (This metric is secondary to the forward progress requirements above.)
-// TODO: May also want to consider:
-// - Constants (since they can be rematerialized)
-// - Allocated spill slots
-static bool HasGreaterNodePriority(const InterferenceNode* lhs,
- const InterferenceNode* rhs) {
- // (1) Prioritize the node that requires a color.
- if (lhs->RequiresColor() != rhs->RequiresColor()) {
- return lhs->RequiresColor();
- }
-
- // (2) Prioritize the interval that has a higher spill weight.
- return lhs->GetSpillWeight() > rhs->GetSpillWeight();
-}
-
-// A ColoringIteration holds the many data structures needed for a single graph coloring attempt,
-// and provides methods for each phase of the attempt.
-class ColoringIteration {
- public:
- ColoringIteration(RegisterAllocatorGraphColor* register_allocator,
- ScopedArenaAllocator* allocator,
- bool processing_core_regs,
- size_t num_regs)
- : register_allocator_(register_allocator),
- allocator_(allocator),
- processing_core_regs_(processing_core_regs),
- num_regs_(num_regs),
- interval_node_map_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- prunable_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- pruned_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- simplify_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)),
- coalesce_worklist_(CoalesceOpportunity::CmpPriority,
- allocator->Adapter(kArenaAllocRegisterAllocator)),
- adjacent_nodes_links_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- coalesce_opportunities_links_(allocator->Adapter(kArenaAllocRegisterAllocator)) {}
-
- // Use the intervals collected from instructions to construct an
- // interference graph mapping intervals to adjacency lists.
- // Also, collect synthesized safepoint nodes, used to keep
- // track of live intervals across safepoints.
- // TODO: Should build safepoints elsewhere.
- void BuildInterferenceGraph(const ScopedArenaVector<LiveInterval*>& intervals,
- const ScopedArenaVector<InterferenceNode*>& physical_nodes);
-
- // Add coalesce opportunities to interference nodes.
- void FindCoalesceOpportunities();
-
- // Prune nodes from the interference graph to be colored later. Build
- // a stack (pruned_nodes) containing these intervals in an order determined
- // by various heuristics.
- void PruneInterferenceGraph();
-
- // Process pruned_intervals_ to color the interference graph, spilling when
- // necessary. Returns true if successful. Else, some intervals have been
- // split, and the interference graph should be rebuilt for another attempt.
- bool ColorInterferenceGraph();
-
- // Return prunable nodes.
- // The register allocator will need to access prunable nodes after coloring
- // in order to tell the code generator which registers have been assigned.
- ArrayRef<InterferenceNode* const> GetPrunableNodes() const {
- return ArrayRef<InterferenceNode* const>(prunable_nodes_);
- }
-
- private:
- // Create a coalesce opportunity between two nodes.
- void CreateCoalesceOpportunity(InterferenceNode* a,
- InterferenceNode* b,
- CoalesceKind kind,
- size_t position);
-
- // Add an edge in the interference graph, if valid.
- // Note that `guaranteed_not_interfering_yet` is used to optimize adjacency set insertion
- // when possible.
- void AddPotentialInterference(InterferenceNode* from,
- InterferenceNode* to,
- bool guaranteed_not_interfering_yet,
- bool both_directions = true);
-
- // Invalidate all coalesce opportunities this node has, so that it (and possibly its neighbors)
- // may be pruned from the interference graph.
- void FreezeMoves(InterferenceNode* node);
-
- // Prune a node from the interference graph, updating worklists if necessary.
- void PruneNode(InterferenceNode* node);
-
- // Add coalesce opportunities associated with this node to the coalesce worklist.
- void EnableCoalesceOpportunities(InterferenceNode* node);
-
- // If needed, from `node` from the freeze worklist to the simplify worklist.
- void CheckTransitionFromFreezeWorklist(InterferenceNode* node);
-
- // Return true if `into` is colored, and `from` can be coalesced with `into` conservatively.
- bool PrecoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
-
- // Return true if `from` and `into` are uncolored, and can be coalesced conservatively.
- bool UncoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
-
- void Coalesce(CoalesceOpportunity* opportunity);
-
- // Merge `from` into `into` in the interference graph.
- void Combine(InterferenceNode* from, InterferenceNode* into);
-
- // A reference to the register allocator instance,
- // needed to split intervals and assign spill slots.
- RegisterAllocatorGraphColor* register_allocator_;
-
- // A scoped arena allocator used for a single graph coloring attempt.
- ScopedArenaAllocator* allocator_;
-
- const bool processing_core_regs_;
-
- const size_t num_regs_;
-
- // A map from live intervals to interference nodes.
- ScopedArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_;
-
- // Uncolored nodes that should be pruned from the interference graph.
- ScopedArenaVector<InterferenceNode*> prunable_nodes_;
-
- // A stack of nodes pruned from the interference graph, waiting to be pruned.
- ScopedArenaStdStack<InterferenceNode*> pruned_nodes_;
-
- // A queue containing low degree, non-move-related nodes that can pruned immediately.
- ScopedArenaDeque<InterferenceNode*> simplify_worklist_;
-
- // A queue containing low degree, move-related nodes.
- ScopedArenaDeque<InterferenceNode*> freeze_worklist_;
-
- // A queue containing high degree nodes.
- // If we have to prune from the spill worklist, we cannot guarantee
- // the pruned node a color, so we order the worklist by priority.
- ScopedArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_;
-
- // A queue containing coalesce opportunities.
- // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those
- // inside of loops) are more important than others.
- ScopedArenaPriorityQueue<CoalesceOpportunity*,
- decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_;
-
- // Storage for links to adjacent nodes for interference nodes.
- // Using std::deque so that elements do not move when adding new ones.
- ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>> adjacent_nodes_links_;
-
- // Storage for links to coalesce opportunities for interference nodes.
- // Using std::deque so that elements do not move when adding new ones.
- ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>> coalesce_opportunities_links_;
-
- DISALLOW_COPY_AND_ASSIGN(ColoringIteration);
-};
-
-static bool IsCoreInterval(LiveInterval* interval) {
- return !DataType::IsFloatingPointType(interval->GetType());
-}
-
-static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) {
- return static_cast<size_t>(InstructionSetPointerSize(codegen.GetInstructionSet())) / kVRegSize;
-}
-
-RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ScopedArenaAllocator* allocator,
- CodeGenerator* codegen,
- const SsaLivenessAnalysis& liveness,
- bool iterative_move_coalescing)
- : RegisterAllocator(allocator, codegen, liveness),
- iterative_move_coalescing_(iterative_move_coalescing),
- core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- num_int_spill_slots_(0),
- num_double_spill_slots_(0),
- num_float_spill_slots_(0),
- num_long_spill_slots_(0),
- catch_phi_spill_slot_counter_(0),
- reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)),
- reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()) {
- // Before we ask for blocked registers, set them up in the code generator.
- codegen->SetupBlockedRegisters();
-
- // Initialize physical core register live intervals and blocked registers.
- // This includes globally blocked registers, such as the stack pointer.
- physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr);
- for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
- LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kInt32);
- physical_core_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness);
- physical_core_nodes_[i]->stage = NodeStage::kPrecolored;
- core_intervals_.push_back(interval);
- if (codegen_->IsBlockedCoreRegister(i)) {
- interval->AddRange(0, liveness.GetMaxLifetimePosition());
- }
- }
- // Initialize physical floating point register live intervals and blocked registers.
- physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr);
- for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
- LiveInterval* interval =
- LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kFloat32);
- physical_fp_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness);
- physical_fp_nodes_[i]->stage = NodeStage::kPrecolored;
- fp_intervals_.push_back(interval);
- if (codegen_->IsBlockedFloatingPointRegister(i)) {
- interval->AddRange(0, liveness.GetMaxLifetimePosition());
- }
- }
-}
-
-RegisterAllocatorGraphColor::~RegisterAllocatorGraphColor() {}
-
-void RegisterAllocatorGraphColor::AllocateRegisters() {
- // (1) Collect and prepare live intervals.
- ProcessInstructions();
-
- for (bool processing_core_regs : {true, false}) {
- ScopedArenaVector<LiveInterval*>& intervals = processing_core_regs
- ? core_intervals_
- : fp_intervals_;
- size_t num_registers = processing_core_regs
- ? codegen_->GetNumberOfCoreRegisters()
- : codegen_->GetNumberOfFloatingPointRegisters();
-
- size_t attempt = 0;
- while (true) {
- ++attempt;
- DCHECK(attempt <= kMaxGraphColoringAttemptsDebug)
- << "Exceeded debug max graph coloring register allocation attempts. "
- << "This could indicate that the register allocator is not making forward progress, "
- << "which could be caused by prioritizing the wrong live intervals. (Short intervals "
- << "should be prioritized over long ones, because they cannot be split further.)";
-
- // Many data structures are cleared between graph coloring attempts, so we reduce
- // total memory usage by using a new scoped arena allocator for each attempt.
- ScopedArenaAllocator coloring_attempt_allocator(allocator_->GetArenaStack());
- ColoringIteration iteration(this,
- &coloring_attempt_allocator,
- processing_core_regs,
- num_registers);
-
- // (2) Build the interference graph.
- ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
- ? physical_core_nodes_
- : physical_fp_nodes_;
- iteration.BuildInterferenceGraph(intervals, physical_nodes);
-
- // (3) Add coalesce opportunities.
- // If we have tried coloring the graph a suspiciously high number of times, give
- // up on move coalescing, just in case the coalescing heuristics are not conservative.
- // (This situation will be caught if DCHECKs are turned on.)
- if (iterative_move_coalescing_ && attempt <= kMaxGraphColoringAttemptsDebug) {
- iteration.FindCoalesceOpportunities();
- }
-
- // (4) Prune all uncolored nodes from interference graph.
- iteration.PruneInterferenceGraph();
-
- // (5) Color pruned nodes based on interferences.
- bool successful = iteration.ColorInterferenceGraph();
-
- // We manually clear coalesce opportunities for physical nodes,
- // since they persist across coloring attempts.
- for (InterferenceNode* node : physical_core_nodes_) {
- node->ClearCoalesceOpportunities();
- }
- for (InterferenceNode* node : physical_fp_nodes_) {
- node->ClearCoalesceOpportunities();
- }
-
- if (successful) {
- // Assign spill slots.
- AllocateSpillSlots(iteration.GetPrunableNodes());
-
- // Tell the code generator which registers were allocated.
- // We only look at prunable_nodes because we already told the code generator about
- // fixed intervals while processing instructions. We also ignore the fixed intervals
- // placed at the top of catch blocks.
- for (InterferenceNode* node : iteration.GetPrunableNodes()) {
- LiveInterval* interval = node->GetInterval();
- if (interval->HasRegister()) {
- Location low_reg = processing_core_regs
- ? Location::RegisterLocation(interval->GetRegister())
- : Location::FpuRegisterLocation(interval->GetRegister());
- codegen_->AddAllocatedRegister(low_reg);
- if (interval->HasHighInterval()) {
- LiveInterval* high = interval->GetHighInterval();
- DCHECK(high->HasRegister());
- Location high_reg = processing_core_regs
- ? Location::RegisterLocation(high->GetRegister())
- : Location::FpuRegisterLocation(high->GetRegister());
- codegen_->AddAllocatedRegister(high_reg);
- }
- } else {
- DCHECK_IMPLIES(interval->HasHighInterval(),
- !interval->GetHighInterval()->HasRegister());
- }
- }
-
- break;
- }
- } // while unsuccessful
- } // for processing_core_instructions
-
- // (6) Resolve locations and deconstruct SSA form.
- RegisterAllocationResolver(codegen_, liveness_)
- .Resolve(ArrayRef<HInstruction* const>(safepoints_),
- reserved_art_method_slots_ + reserved_out_slots_,
- num_int_spill_slots_,
- num_long_spill_slots_,
- num_float_spill_slots_,
- num_double_spill_slots_,
- catch_phi_spill_slot_counter_,
- ArrayRef<LiveInterval* const>(temp_intervals_));
-
- if (kIsDebugBuild) {
- Validate(/*log_fatal_on_failure*/ true);
- }
-}
-
-bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) {
- for (bool processing_core_regs : {true, false}) {
- ScopedArenaAllocator allocator(allocator_->GetArenaStack());
- ScopedArenaVector<LiveInterval*> intervals(
- allocator.Adapter(kArenaAllocRegisterAllocatorValidate));
- for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
- HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
- LiveInterval* interval = instruction->GetLiveInterval();
- if (interval != nullptr && IsCoreInterval(interval) == processing_core_regs) {
- intervals.push_back(instruction->GetLiveInterval());
- }
- }
-
- ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
- ? physical_core_nodes_
- : physical_fp_nodes_;
- for (InterferenceNode* fixed : physical_nodes) {
- LiveInterval* interval = fixed->GetInterval();
- if (interval->GetFirstRange() != nullptr) {
- // Ideally we would check fixed ranges as well, but currently there are times when
- // two fixed intervals for the same register will overlap. For example, a fixed input
- // and a fixed output may sometimes share the same register, in which there will be two
- // fixed intervals for the same place.
- }
- }
-
- for (LiveInterval* temp : temp_intervals_) {
- if (IsCoreInterval(temp) == processing_core_regs) {
- intervals.push_back(temp);
- }
- }
-
- size_t spill_slots = num_int_spill_slots_
- + num_long_spill_slots_
- + num_float_spill_slots_
- + num_double_spill_slots_
- + catch_phi_spill_slot_counter_;
- bool ok = ValidateIntervals(ArrayRef<LiveInterval* const>(intervals),
- spill_slots,
- reserved_art_method_slots_ + reserved_out_slots_,
- *codegen_,
- processing_core_regs,
- log_fatal_on_failure);
- if (!ok) {
- return false;
- }
- } // for processing_core_regs
-
- return true;
-}
-
-void RegisterAllocatorGraphColor::ProcessInstructions() {
- for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) {
- // Note that we currently depend on this ordering, since some helper
- // code is designed for linear scan register allocation.
- for (HBackwardInstructionIterator instr_it(block->GetInstructions());
- !instr_it.Done();
- instr_it.Advance()) {
- ProcessInstruction(instr_it.Current());
- }
-
- for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
- ProcessInstruction(phi_it.Current());
- }
-
- if (block->IsCatchBlock()
- || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
- // By blocking all registers at the top of each catch block or irreducible loop, we force
- // intervals belonging to the live-in set of the catch/header block to be spilled.
- // TODO(ngeoffray): Phis in this block could be allocated in register.
- size_t position = block->GetLifetimeStart();
- BlockRegisters(position, position + 1);
- }
- }
-}
-
-bool RegisterAllocatorGraphColor::TryRemoveSuspendCheckEntry(HInstruction* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- if (instruction->IsSuspendCheckEntry() && !codegen_->NeedsSuspendCheckEntry()) {
- // TODO: We do this here because we do not want the suspend check to artificially
- // create live registers. We should find another place, but this is currently the
- // simplest.
- DCHECK_EQ(locations->GetTempCount(), 0u);
- instruction->GetBlock()->RemoveInstruction(instruction);
- return true;
- }
- return false;
-}
-
-void RegisterAllocatorGraphColor::ProcessInstruction(HInstruction* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- if (locations == nullptr) {
- return;
- }
- if (TryRemoveSuspendCheckEntry(instruction)) {
- return;
- }
-
- CheckForTempLiveIntervals(instruction);
- CheckForSafepoint(instruction);
- if (locations->WillCall()) {
- // If a call will happen, create fixed intervals for caller-save registers.
- // TODO: Note that it may be beneficial to later split intervals at this point,
- // so that we allow last-minute moves from a caller-save register
- // to a callee-save register.
- BlockRegisters(instruction->GetLifetimePosition(),
- instruction->GetLifetimePosition() + 1,
- /*caller_save_only*/ true);
- }
- CheckForFixedInputs(instruction);
-
- LiveInterval* interval = instruction->GetLiveInterval();
- if (interval == nullptr) {
- // Instructions lacking a valid output location do not have a live interval.
- DCHECK(!locations->Out().IsValid());
- return;
- }
-
- // Low intervals act as representatives for their corresponding high interval.
- DCHECK(!interval->IsHighInterval());
- if (codegen_->NeedsTwoRegisters(interval->GetType())) {
- interval->AddHighInterval();
- }
- AddSafepointsFor(instruction);
- CheckForFixedOutput(instruction);
- AllocateSpillSlotForCatchPhi(instruction);
-
- ScopedArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval)
- ? core_intervals_
- : fp_intervals_;
- if (interval->HasSpillSlot() || instruction->IsConstant()) {
- // Note that if an interval already has a spill slot, then its value currently resides
- // in the stack (e.g., parameters). Thus we do not have to allocate a register until its first
- // register use. This is also true for constants, which can be materialized at any point.
- size_t first_register_use = interval->FirstRegisterUse();
- if (first_register_use != kNoLifetime) {
- LiveInterval* split = SplitBetween(interval, interval->GetStart(), first_register_use - 1);
- intervals.push_back(split);
- } else {
- // We won't allocate a register for this value.
- }
- } else {
- intervals.push_back(interval);
- }
-}
-
-void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) {
- // We simply block physical registers where necessary.
- // TODO: Ideally we would coalesce the physical register with the register
- // allocated to the input value, but this can be tricky if, e.g., there
- // could be multiple physical register uses of the same value at the
- // same instruction. Furthermore, there's currently no distinction between
- // fixed inputs to a call (which will be clobbered) and other fixed inputs (which
- // may not be clobbered).
- LocationSummary* locations = instruction->GetLocations();
- size_t position = instruction->GetLifetimePosition();
- for (size_t i = 0; i < locations->GetInputCount(); ++i) {
- Location input = locations->InAt(i);
- if (input.IsRegister() || input.IsFpuRegister()) {
- BlockRegister(input, position, position + 1);
- codegen_->AddAllocatedRegister(input);
- } else if (input.IsPair()) {
- BlockRegister(input.ToLow(), position, position + 1);
- BlockRegister(input.ToHigh(), position, position + 1);
- codegen_->AddAllocatedRegister(input.ToLow());
- codegen_->AddAllocatedRegister(input.ToHigh());
- }
- }
-}
-
-void RegisterAllocatorGraphColor::CheckForFixedOutput(HInstruction* instruction) {
- // If an instruction has a fixed output location, we give the live interval a register and then
- // proactively split it just after the definition point to avoid creating too many interferences
- // with a fixed node.
- LiveInterval* interval = instruction->GetLiveInterval();
- Location out = interval->GetDefinedBy()->GetLocations()->Out();
- size_t position = instruction->GetLifetimePosition();
- DCHECK_GE(interval->GetEnd() - position, 2u);
-
- if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
- out = instruction->GetLocations()->InAt(0);
- }
-
- if (out.IsRegister() || out.IsFpuRegister()) {
- interval->SetRegister(out.reg());
- codegen_->AddAllocatedRegister(out);
- Split(interval, position + 1);
- } else if (out.IsPair()) {
- interval->SetRegister(out.low());
- interval->GetHighInterval()->SetRegister(out.high());
- codegen_->AddAllocatedRegister(out.ToLow());
- codegen_->AddAllocatedRegister(out.ToHigh());
- Split(interval, position + 1);
- } else if (out.IsStackSlot() || out.IsDoubleStackSlot()) {
- interval->SetSpillSlot(out.GetStackIndex());
- } else {
- DCHECK(out.IsUnallocated() || out.IsConstant());
- }
-}
-
-void RegisterAllocatorGraphColor::AddSafepointsFor(HInstruction* instruction) {
- LiveInterval* interval = instruction->GetLiveInterval();
- for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
- HInstruction* safepoint = safepoints_[safepoint_index - 1u];
- size_t safepoint_position = safepoint->GetLifetimePosition();
-
- // Test that safepoints_ are ordered in the optimal way.
- DCHECK(safepoint_index == safepoints_.size() ||
- safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
-
- if (safepoint_position == interval->GetStart()) {
- // The safepoint is for this instruction, so the location of the instruction
- // does not need to be saved.
- DCHECK_EQ(safepoint_index, safepoints_.size());
- DCHECK_EQ(safepoint, instruction);
- continue;
- } else if (interval->IsDeadAt(safepoint_position)) {
- break;
- } else if (!interval->Covers(safepoint_position)) {
- // Hole in the interval.
- continue;
- }
- interval->AddSafepoint(safepoint);
- }
-}
-
-void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- size_t position = instruction->GetLifetimePosition();
- for (size_t i = 0; i < locations->GetTempCount(); ++i) {
- Location temp = locations->GetTemp(i);
- if (temp.IsRegister() || temp.IsFpuRegister()) {
- BlockRegister(temp, position, position + 1);
- codegen_->AddAllocatedRegister(temp);
- } else {
- DCHECK(temp.IsUnallocated());
- switch (temp.GetPolicy()) {
- case Location::kRequiresRegister: {
- LiveInterval* interval =
- LiveInterval::MakeTempInterval(allocator_, DataType::Type::kInt32);
- interval->AddTempUse(instruction, i);
- core_intervals_.push_back(interval);
- temp_intervals_.push_back(interval);
- break;
- }
-
- case Location::kRequiresFpuRegister: {
- LiveInterval* interval =
- LiveInterval::MakeTempInterval(allocator_, DataType::Type::kFloat64);
- interval->AddTempUse(instruction, i);
- fp_intervals_.push_back(interval);
- temp_intervals_.push_back(interval);
- if (codegen_->NeedsTwoRegisters(DataType::Type::kFloat64)) {
- interval->AddHighInterval(/*is_temp*/ true);
- temp_intervals_.push_back(interval->GetHighInterval());
- }
- break;
- }
-
- default:
- LOG(FATAL) << "Unexpected policy for temporary location "
- << temp.GetPolicy();
- }
- }
- }
-}
-
-void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) {
- LocationSummary* locations = instruction->GetLocations();
-
- if (locations->NeedsSafepoint()) {
- safepoints_.push_back(instruction);
- }
-}
-
-LiveInterval* RegisterAllocatorGraphColor::TrySplit(LiveInterval* interval, size_t position) {
- if (interval->GetStart() < position && position < interval->GetEnd()) {
- return Split(interval, position);
- } else {
- return interval;
- }
-}
-
-void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) {
- DCHECK(!interval->IsHighInterval());
-
- // Split just after a register definition.
- if (interval->IsParent() && interval->DefinitionRequiresRegister()) {
- interval = TrySplit(interval, interval->GetStart() + 1);
- }
-
- // Process uses in the range [interval->GetStart(), interval->GetEnd()], i.e.
- // [interval->GetStart(), interval->GetEnd() + 1)
- auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
- interval->GetUses().end(),
- interval->GetStart(),
- interval->GetEnd() + 1u);
- // Split around register uses.
- for (const UsePosition& use : matching_use_range) {
- if (use.RequiresRegister()) {
- size_t position = use.GetPosition();
- interval = TrySplit(interval, position - 1);
- if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) {
- // If we are at the very end of a basic block, we cannot split right
- // at the use. Split just after instead.
- interval = TrySplit(interval, position + 1);
- } else {
- interval = TrySplit(interval, position);
- }
- }
- }
-}
-
-void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* instruction) {
- if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
- HPhi* phi = instruction->AsPhi();
- LiveInterval* interval = phi->GetLiveInterval();
-
- HInstruction* previous_phi = phi->GetPrevious();
- DCHECK(previous_phi == nullptr ||
- previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
- << "Phis expected to be sorted by vreg number, "
- << "so that equivalent phis are adjacent.";
-
- if (phi->IsVRegEquivalentOf(previous_phi)) {
- // Assign the same spill slot.
- DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
- interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
- } else {
- interval->SetSpillSlot(catch_phi_spill_slot_counter_);
- catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded();
- }
- }
-}
-
-void RegisterAllocatorGraphColor::BlockRegister(Location location,
- size_t start,
- size_t end) {
- DCHECK(location.IsRegister() || location.IsFpuRegister());
- int reg = location.reg();
- LiveInterval* interval = location.IsRegister()
- ? physical_core_nodes_[reg]->GetInterval()
- : physical_fp_nodes_[reg]->GetInterval();
- DCHECK(interval->GetRegister() == reg);
- bool blocked_by_codegen = location.IsRegister()
- ? codegen_->IsBlockedCoreRegister(reg)
- : codegen_->IsBlockedFloatingPointRegister(reg);
- if (blocked_by_codegen) {
- // We've already blocked this register for the entire method. (And adding a
- // range inside another range violates the preconditions of AddRange).
- } else {
- interval->AddRange(start, end);
- }
-}
-
-void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
- for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
- if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
- BlockRegister(Location::RegisterLocation(i), start, end);
- }
- }
- for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
- if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
- BlockRegister(Location::FpuRegisterLocation(i), start, end);
- }
- }
-}
-
-void ColoringIteration::AddPotentialInterference(InterferenceNode* from,
- InterferenceNode* to,
- bool guaranteed_not_interfering_yet,
- bool both_directions) {
- if (from->IsPrecolored()) {
- // We save space by ignoring outgoing edges from fixed nodes.
- } else if (to->IsPrecolored()) {
- // It is important that only a single node represents a given fixed register in the
- // interference graph. We retrieve that node here.
- const ScopedArenaVector<InterferenceNode*>& physical_nodes =
- to->GetInterval()->IsFloatingPoint() ? register_allocator_->physical_fp_nodes_
- : register_allocator_->physical_core_nodes_;
- InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()];
- from->AddInterference(
- physical_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_);
- DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister());
- DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node";
-
- // If a node interferes with a fixed pair node, the weight of the edge may
- // be inaccurate after using the alias of the pair node, because the alias of the pair node
- // is a singular node.
- // We could make special pair fixed nodes, but that ends up being too conservative because
- // a node could then interfere with both {r1} and {r1,r2}, leading to a degree of
- // three rather than two.
- // Instead, we explicitly add an interference with the high node of the fixed pair node.
- // TODO: This is too conservative at time for pair nodes, but the fact that fixed pair intervals
- // can be unaligned on x86 complicates things.
- if (to->IsPair()) {
- InterferenceNode* high_node =
- physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()];
- DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(),
- high_node->GetInterval()->GetRegister());
- from->AddInterference(
- high_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_);
- }
- } else {
- // Standard interference between two uncolored nodes.
- from->AddInterference(to, guaranteed_not_interfering_yet, &adjacent_nodes_links_);
- }
-
- if (both_directions) {
- AddPotentialInterference(to, from, guaranteed_not_interfering_yet, /*both_directions*/ false);
- }
-}
-
-// Returns true if `in_node` represents an input interval of `out_node`, and the output interval
-// is allowed to have the same register as the input interval.
-// TODO: Ideally we should just produce correct intervals in liveness analysis.
-// We would need to refactor the current live interval layout to do so, which is
-// no small task.
-static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNode* out_node) {
- LiveInterval* output_interval = out_node->GetInterval();
- HInstruction* defined_by = output_interval->GetDefinedBy();
- if (defined_by == nullptr) {
- // This must not be a definition point.
- return false;
- }
-
- LocationSummary* locations = defined_by->GetLocations();
- if (locations->OutputCanOverlapWithInputs()) {
- // This instruction does not allow the output to reuse a register from an input.
- return false;
- }
-
- LiveInterval* input_interval = in_node->GetInterval();
- LiveInterval* next_sibling = input_interval->GetNextSibling();
- size_t def_position = defined_by->GetLifetimePosition();
- size_t use_position = def_position + 1;
- if (next_sibling != nullptr && next_sibling->GetStart() == use_position) {
- // The next sibling starts at the use position, so reusing the input register in the output
- // would clobber the input before it's moved into the sibling interval location.
- return false;
- }
-
- if (!input_interval->IsDeadAt(use_position) && input_interval->CoversSlow(use_position)) {
- // The input interval is live after the use position.
- return false;
- }
-
- HInputsRef inputs = defined_by->GetInputs();
- for (size_t i = 0; i < inputs.size(); ++i) {
- if (inputs[i]->GetLiveInterval()->GetSiblingAt(def_position) == input_interval) {
- DCHECK(input_interval->SameRegisterKind(*output_interval));
- return true;
- }
- }
-
- // The input interval was not an input for this instruction.
- return false;
-}
-
-void ColoringIteration::BuildInterferenceGraph(
- const ScopedArenaVector<LiveInterval*>& intervals,
- const ScopedArenaVector<InterferenceNode*>& physical_nodes) {
- DCHECK(interval_node_map_.empty() && prunable_nodes_.empty());
- // Build the interference graph efficiently by ordering range endpoints
- // by position and doing a linear sweep to find interferences. (That is, we
- // jump from endpoint to endpoint, maintaining a set of intervals live at each
- // point. If two nodes are ever in the live set at the same time, then they
- // interfere with each other.)
- //
- // We order by both position and (secondarily) by whether the endpoint
- // begins or ends a range; we want to process range endings before range
- // beginnings at the same position because they should not conflict.
- //
- // For simplicity, we create a tuple for each endpoint, and then sort the tuples.
- // Tuple contents: (position, is_range_beginning, node).
- ScopedArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints(
- allocator_->Adapter(kArenaAllocRegisterAllocator));
-
- // We reserve plenty of space to avoid excessive copying.
- range_endpoints.reserve(4 * prunable_nodes_.size());
-
- for (LiveInterval* parent : intervals) {
- for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) {
- LiveRange* range = sibling->GetFirstRange();
- if (range != nullptr) {
- InterferenceNode* node =
- new (allocator_) InterferenceNode(sibling, register_allocator_->liveness_);
- interval_node_map_.insert(std::make_pair(sibling, node));
-
- if (sibling->HasRegister()) {
- // Fixed nodes should alias the canonical node for the corresponding register.
- node->stage = NodeStage::kPrecolored;
- InterferenceNode* physical_node = physical_nodes[sibling->GetRegister()];
- node->SetAlias(physical_node);
- DCHECK_EQ(node->GetInterval()->GetRegister(),
- physical_node->GetInterval()->GetRegister());
- } else {
- node->stage = NodeStage::kPrunable;
- prunable_nodes_.push_back(node);
- }
-
- while (range != nullptr) {
- range_endpoints.push_back(std::make_tuple(range->GetStart(), true, node));
- range_endpoints.push_back(std::make_tuple(range->GetEnd(), false, node));
- range = range->GetNext();
- }
- }
- }
- }
-
- // Sort the endpoints.
- // We explicitly ignore the third entry of each tuple (the node pointer) in order
- // to maintain determinism.
- std::sort(range_endpoints.begin(), range_endpoints.end(),
- [] (const std::tuple<size_t, bool, InterferenceNode*>& lhs,
- const std::tuple<size_t, bool, InterferenceNode*>& rhs) {
- return std::tie(std::get<0>(lhs), std::get<1>(lhs))
- < std::tie(std::get<0>(rhs), std::get<1>(rhs));
- });
-
- // Nodes live at the current position in the linear sweep.
- ScopedArenaVector<InterferenceNode*> live(allocator_->Adapter(kArenaAllocRegisterAllocator));
-
- // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the
- // live set. When we encounter the end of a range, we remove the corresponding node
- // from the live set. Nodes interfere if they are in the live set at the same time.
- for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) {
- bool is_range_beginning;
- InterferenceNode* node;
- size_t position;
- // Extract information from the tuple, including the node this tuple represents.
- std::tie(position, is_range_beginning, node) = *it;
-
- if (is_range_beginning) {
- bool guaranteed_not_interfering_yet = position == node->GetInterval()->GetStart();
- for (InterferenceNode* conflicting : live) {
- DCHECK_NE(node, conflicting);
- if (CheckInputOutputCanOverlap(conflicting, node)) {
- // We do not add an interference, because the instruction represented by `node` allows
- // its output to share a register with an input, represented here by `conflicting`.
- } else {
- AddPotentialInterference(node, conflicting, guaranteed_not_interfering_yet);
- }
- }
- DCHECK(std::find(live.begin(), live.end(), node) == live.end());
- live.push_back(node);
- } else {
- // End of range.
- auto live_it = std::find(live.begin(), live.end(), node);
- DCHECK(live_it != live.end());
- live.erase(live_it);
- }
- }
- DCHECK(live.empty());
-}
-
-void ColoringIteration::CreateCoalesceOpportunity(InterferenceNode* a,
- InterferenceNode* b,
- CoalesceKind kind,
- size_t position) {
- DCHECK_EQ(a->IsPair(), b->IsPair())
- << "Nodes of different memory widths should never be coalesced";
- CoalesceOpportunity* opportunity =
- new (allocator_) CoalesceOpportunity(a, b, kind, position, register_allocator_->liveness_);
- a->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_);
- b->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_);
- coalesce_worklist_.push(opportunity);
-}
-
-// When looking for coalesce opportunities, we use the interval_node_map_ to find the node
-// corresponding to an interval. Note that not all intervals are in this map, notably the parents
-// of constants and stack arguments. (However, these interval should not be involved in coalesce
-// opportunities anyway, because they're not going to be in registers.)
-void ColoringIteration::FindCoalesceOpportunities() {
- DCHECK(coalesce_worklist_.empty());
-
- for (InterferenceNode* node : prunable_nodes_) {
- LiveInterval* interval = node->GetInterval();
-
- // Coalesce siblings.
- LiveInterval* next_sibling = interval->GetNextSibling();
- if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) {
- auto it = interval_node_map_.find(next_sibling);
- if (it != interval_node_map_.end()) {
- InterferenceNode* sibling_node = it->second;
- CreateCoalesceOpportunity(node,
- sibling_node,
- CoalesceKind::kAdjacentSibling,
- interval->GetEnd());
- }
- }
-
- // Coalesce fixed outputs with this interval if this interval is an adjacent sibling.
- LiveInterval* parent = interval->GetParent();
- if (parent->HasRegister()
- && parent->GetNextSibling() == interval
- && parent->GetEnd() == interval->GetStart()) {
- auto it = interval_node_map_.find(parent);
- if (it != interval_node_map_.end()) {
- InterferenceNode* parent_node = it->second;
- CreateCoalesceOpportunity(node,
- parent_node,
- CoalesceKind::kFixedOutputSibling,
- parent->GetEnd());
- }
- }
-
- // Try to prevent moves across blocks.
- // Note that this does not lead to many succeeding coalesce attempts, so could be removed
- // if found to add to compile time.
- const SsaLivenessAnalysis& liveness = register_allocator_->liveness_;
- if (interval->IsSplit() && liveness.IsAtBlockBoundary(interval->GetStart() / 2)) {
- // If the start of this interval is at a block boundary, we look at the
- // location of the interval in blocks preceding the block this interval
- // starts at. This can avoid a move between the two blocks.
- HBasicBlock* block = liveness.GetBlockFromPosition(interval->GetStart() / 2);
- for (HBasicBlock* predecessor : block->GetPredecessors()) {
- size_t position = predecessor->GetLifetimeEnd() - 1;
- LiveInterval* existing = interval->GetParent()->GetSiblingAt(position);
- if (existing != nullptr) {
- auto it = interval_node_map_.find(existing);
- if (it != interval_node_map_.end()) {
- InterferenceNode* existing_node = it->second;
- CreateCoalesceOpportunity(node,
- existing_node,
- CoalesceKind::kNonlinearControlFlow,
- position);
- }
- }
- }
- }
-
- // Coalesce phi inputs with the corresponding output.
- HInstruction* defined_by = interval->GetDefinedBy();
- if (defined_by != nullptr && defined_by->IsPhi()) {
- ArrayRef<HBasicBlock* const> predecessors(defined_by->GetBlock()->GetPredecessors());
- HInputsRef inputs = defined_by->GetInputs();
-
- for (size_t i = 0, e = inputs.size(); i < e; ++i) {
- // We want the sibling at the end of the appropriate predecessor block.
- size_t position = predecessors[i]->GetLifetimeEnd() - 1;
- LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position);
-
- auto it = interval_node_map_.find(input_interval);
- if (it != interval_node_map_.end()) {
- InterferenceNode* input_node = it->second;
- CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position);
- }
- }
- }
-
- // Coalesce output with first input when policy is kSameAsFirstInput.
- if (defined_by != nullptr) {
- Location out = defined_by->GetLocations()->Out();
- if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
- LiveInterval* input_interval
- = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1);
- // TODO: Could we consider lifetime holes here?
- if (input_interval->GetEnd() == interval->GetStart()) {
- auto it = interval_node_map_.find(input_interval);
- if (it != interval_node_map_.end()) {
- InterferenceNode* input_node = it->second;
- CreateCoalesceOpportunity(node,
- input_node,
- CoalesceKind::kFirstInput,
- interval->GetStart());
- }
- }
- }
- }
-
- // An interval that starts an instruction (that is, it is not split), may
- // re-use the registers used by the inputs of that instruction, based on the
- // location summary.
- if (defined_by != nullptr) {
- DCHECK(!interval->IsSplit());
- LocationSummary* locations = defined_by->GetLocations();
- if (!locations->OutputCanOverlapWithInputs()) {
- HInputsRef inputs = defined_by->GetInputs();
- for (size_t i = 0; i < inputs.size(); ++i) {
- size_t def_point = defined_by->GetLifetimePosition();
- // TODO: Getting the sibling at the def_point might not be quite what we want
- // for fixed inputs, since the use will be *at* the def_point rather than after.
- LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point);
- if (input_interval != nullptr &&
- input_interval->HasHighInterval() == interval->HasHighInterval()) {
- auto it = interval_node_map_.find(input_interval);
- if (it != interval_node_map_.end()) {
- InterferenceNode* input_node = it->second;
- CreateCoalesceOpportunity(node,
- input_node,
- CoalesceKind::kAnyInput,
- interval->GetStart());
- }
- }
- }
- }
- }
-
- // Try to prevent moves into fixed input locations.
- // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e.
- // [interval->GetStart() + 1, interval->GetEnd() + 1)
- auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
- interval->GetUses().end(),
- interval->GetStart() + 1u,
- interval->GetEnd() + 1u);
- for (const UsePosition& use : matching_use_range) {
- HInstruction* user = use.GetUser();
- if (user == nullptr) {
- // User may be null for certain intervals, such as temp intervals.
- continue;
- }
- LocationSummary* locations = user->GetLocations();
- Location input = locations->InAt(use.GetInputIndex());
- if (input.IsRegister() || input.IsFpuRegister()) {
- // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes
- // is currently not supported.
- InterferenceNode* fixed_node = input.IsRegister()
- ? register_allocator_->physical_core_nodes_[input.reg()]
- : register_allocator_->physical_fp_nodes_[input.reg()];
- CreateCoalesceOpportunity(node,
- fixed_node,
- CoalesceKind::kFixedInput,
- user->GetLifetimePosition());
- }
- }
- } // for node in prunable_nodes
-}
-
-static bool IsLowDegreeNode(InterferenceNode* node, size_t num_regs) {
- return node->GetOutDegree() < num_regs;
-}
-
-static bool IsHighDegreeNode(InterferenceNode* node, size_t num_regs) {
- return !IsLowDegreeNode(node, num_regs);
-}
-
-void ColoringIteration::PruneInterferenceGraph() {
- DCHECK(pruned_nodes_.empty()
- && simplify_worklist_.empty()
- && freeze_worklist_.empty()
- && spill_worklist_.empty());
- // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes,
- // and all others as high degree nodes. The distinction is important: low degree nodes are
- // guaranteed a color, while high degree nodes are not.
-
- // Build worklists. Note that the coalesce worklist has already been
- // filled by FindCoalesceOpportunities().
- for (InterferenceNode* node : prunable_nodes_) {
- DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned";
- if (IsLowDegreeNode(node, num_regs_)) {
- if (node->GetCoalesceOpportunities().empty()) {
- // Simplify Worklist.
- node->stage = NodeStage::kSimplifyWorklist;
- simplify_worklist_.push_back(node);
- } else {
- // Freeze Worklist.
- node->stage = NodeStage::kFreezeWorklist;
- freeze_worklist_.push_back(node);
- }
- } else {
- // Spill worklist.
- node->stage = NodeStage::kSpillWorklist;
- spill_worklist_.push(node);
- }
- }
-
- // Prune graph.
- // Note that we do not remove a node from its current worklist if it moves to another, so it may
- // be in multiple worklists at once; the node's `phase` says which worklist it is really in.
- while (true) {
- if (!simplify_worklist_.empty()) {
- // Prune low-degree nodes.
- // TODO: pop_back() should work as well, but it didn't; we get a
- // failed check while pruning. We should look into this.
- InterferenceNode* node = simplify_worklist_.front();
- simplify_worklist_.pop_front();
- DCHECK_EQ(node->stage, NodeStage::kSimplifyWorklist) << "Cannot move from simplify list";
- DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in simplify list should be low degree";
- DCHECK(!node->IsMoveRelated()) << "Nodes in simplify list should not be move related";
- PruneNode(node);
- } else if (!coalesce_worklist_.empty()) {
- // Coalesce.
- CoalesceOpportunity* opportunity = coalesce_worklist_.top();
- coalesce_worklist_.pop();
- if (opportunity->stage == CoalesceStage::kWorklist) {
- Coalesce(opportunity);
- }
- } else if (!freeze_worklist_.empty()) {
- // Freeze moves and prune a low-degree move-related node.
- InterferenceNode* node = freeze_worklist_.front();
- freeze_worklist_.pop_front();
- if (node->stage == NodeStage::kFreezeWorklist) {
- DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in freeze list should be low degree";
- DCHECK(node->IsMoveRelated()) << "Nodes in freeze list should be move related";
- FreezeMoves(node);
- PruneNode(node);
- }
- } else if (!spill_worklist_.empty()) {
- // We spill the lowest-priority node, because pruning a node earlier
- // gives it a higher chance of being spilled.
- InterferenceNode* node = spill_worklist_.top();
- spill_worklist_.pop();
- if (node->stage == NodeStage::kSpillWorklist) {
- DCHECK_GE(node->GetOutDegree(), num_regs_) << "Nodes in spill list should be high degree";
- FreezeMoves(node);
- PruneNode(node);
- }
- } else {
- // Pruning complete.
- break;
- }
- }
- DCHECK_EQ(prunable_nodes_.size(), pruned_nodes_.size());
-}
-
-void ColoringIteration::EnableCoalesceOpportunities(InterferenceNode* node) {
- for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
- if (opportunity->stage == CoalesceStage::kActive) {
- opportunity->stage = CoalesceStage::kWorklist;
- coalesce_worklist_.push(opportunity);
- }
- }
-}
-
-void ColoringIteration::PruneNode(InterferenceNode* node) {
- DCHECK_NE(node->stage, NodeStage::kPruned);
- DCHECK(!node->IsPrecolored());
- node->stage = NodeStage::kPruned;
- pruned_nodes_.push(node);
-
- for (InterferenceNode* adj : node->GetAdjacentNodes()) {
- DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes";
-
- if (adj->IsPrecolored()) {
- // No effect on pre-colored nodes; they're never pruned.
- } else {
- // Remove the interference.
- bool was_high_degree = IsHighDegreeNode(adj, num_regs_);
- DCHECK(adj->ContainsInterference(node))
- << "Missing reflexive interference from non-fixed node";
- adj->RemoveInterference(node);
-
- // Handle transitions from high degree to low degree.
- if (was_high_degree && IsLowDegreeNode(adj, num_regs_)) {
- EnableCoalesceOpportunities(adj);
- for (InterferenceNode* adj_adj : adj->GetAdjacentNodes()) {
- EnableCoalesceOpportunities(adj_adj);
- }
-
- DCHECK_EQ(adj->stage, NodeStage::kSpillWorklist);
- if (adj->IsMoveRelated()) {
- adj->stage = NodeStage::kFreezeWorklist;
- freeze_worklist_.push_back(adj);
- } else {
- adj->stage = NodeStage::kSimplifyWorklist;
- simplify_worklist_.push_back(adj);
- }
- }
- }
- }
-}
-
-void ColoringIteration::CheckTransitionFromFreezeWorklist(InterferenceNode* node) {
- if (IsLowDegreeNode(node, num_regs_) && !node->IsMoveRelated()) {
- DCHECK_EQ(node->stage, NodeStage::kFreezeWorklist);
- node->stage = NodeStage::kSimplifyWorklist;
- simplify_worklist_.push_back(node);
- }
-}
-
-void ColoringIteration::FreezeMoves(InterferenceNode* node) {
- for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
- if (opportunity->stage == CoalesceStage::kDefunct) {
- // Constrained moves should remain constrained, since they will not be considered
- // during last-chance coalescing.
- } else {
- opportunity->stage = CoalesceStage::kInactive;
- }
- InterferenceNode* other = opportunity->node_a->GetAlias() == node
- ? opportunity->node_b->GetAlias()
- : opportunity->node_a->GetAlias();
- if (other != node && other->stage == NodeStage::kFreezeWorklist) {
- DCHECK(IsLowDegreeNode(node, num_regs_));
- CheckTransitionFromFreezeWorklist(other);
- }
- }
-}
-
-bool ColoringIteration::PrecoloredHeuristic(InterferenceNode* from,
- InterferenceNode* into) {
- if (!into->IsPrecolored()) {
- // The uncolored heuristic will cover this case.
- return false;
- }
- if (from->IsPair() || into->IsPair()) {
- // TODO: Merging from a pair node is currently not supported, since fixed pair nodes
- // are currently represented as two single fixed nodes in the graph, and `into` is
- // only one of them. (We may lose the implicit connections to the second one in a merge.)
- return false;
- }
-
- // If all adjacent nodes of `from` are "ok", then we can conservatively merge with `into`.
- // Reasons an adjacent node `adj` can be "ok":
- // (1) If `adj` is low degree, interference with `into` will not affect its existing
- // colorable guarantee. (Notice that coalescing cannot increase its degree.)
- // (2) If `adj` is pre-colored, it already interferes with `into`. See (3).
- // (3) If there's already an interference with `into`, coalescing will not add interferences.
- for (InterferenceNode* adj : from->GetAdjacentNodes()) {
- if (IsLowDegreeNode(adj, num_regs_) || adj->IsPrecolored() || adj->ContainsInterference(into)) {
- // Ok.
- } else {
- return false;
- }
- }
- return true;
-}
-
-bool ColoringIteration::UncoloredHeuristic(InterferenceNode* from,
- InterferenceNode* into) {
- if (into->IsPrecolored()) {
- // The pre-colored heuristic will handle this case.
- return false;
- }
-
- // Arbitrary cap to improve compile time. Tests show that this has negligible affect
- // on generated code.
- if (from->GetOutDegree() + into->GetOutDegree() > 2 * num_regs_) {
- return false;
- }
-
- // It's safe to coalesce two nodes if the resulting node has fewer than `num_regs` neighbors
- // of high degree. (Low degree neighbors can be ignored, because they will eventually be
- // pruned from the interference graph in the simplify stage.)
- size_t high_degree_interferences = 0;
- for (InterferenceNode* adj : from->GetAdjacentNodes()) {
- if (IsHighDegreeNode(adj, num_regs_)) {
- high_degree_interferences += from->EdgeWeightWith(adj);
- }
- }
- for (InterferenceNode* adj : into->GetAdjacentNodes()) {
- if (IsHighDegreeNode(adj, num_regs_)) {
- if (from->ContainsInterference(adj)) {
- // We've already counted this adjacent node.
- // Furthermore, its degree will decrease if coalescing succeeds. Thus, it's possible that
- // we should not have counted it at all. (This extends the textbook Briggs coalescing test,
- // but remains conservative.)
- if (adj->GetOutDegree() - into->EdgeWeightWith(adj) < num_regs_) {
- high_degree_interferences -= from->EdgeWeightWith(adj);
- }
- } else {
- high_degree_interferences += into->EdgeWeightWith(adj);
- }
- }
- }
-
- return high_degree_interferences < num_regs_;
-}
-
-void ColoringIteration::Combine(InterferenceNode* from,
- InterferenceNode* into) {
- from->SetAlias(into);
-
- // Add interferences.
- for (InterferenceNode* adj : from->GetAdjacentNodes()) {
- bool was_low_degree = IsLowDegreeNode(adj, num_regs_);
- AddPotentialInterference(adj, into, /*guaranteed_not_interfering_yet*/ false);
- if (was_low_degree && IsHighDegreeNode(adj, num_regs_)) {
- // This is a (temporary) transition to a high degree node. Its degree will decrease again
- // when we prune `from`, but it's best to be consistent about the current worklist.
- adj->stage = NodeStage::kSpillWorklist;
- spill_worklist_.push(adj);
- }
- }
-
- // Add coalesce opportunities.
- for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) {
- if (opportunity->stage != CoalesceStage::kDefunct) {
- into->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_);
- }
- }
- EnableCoalesceOpportunities(from);
-
- // Prune and update worklists.
- PruneNode(from);
- if (IsLowDegreeNode(into, num_regs_)) {
- // Coalesce(...) takes care of checking for a transition to the simplify worklist.
- DCHECK_EQ(into->stage, NodeStage::kFreezeWorklist);
- } else if (into->stage == NodeStage::kFreezeWorklist) {
- // This is a transition to a high degree node.
- into->stage = NodeStage::kSpillWorklist;
- spill_worklist_.push(into);
- } else {
- DCHECK(into->stage == NodeStage::kSpillWorklist || into->stage == NodeStage::kPrecolored);
- }
-}
-
-void ColoringIteration::Coalesce(CoalesceOpportunity* opportunity) {
- InterferenceNode* from = opportunity->node_a->GetAlias();
- InterferenceNode* into = opportunity->node_b->GetAlias();
- DCHECK_NE(from->stage, NodeStage::kPruned);
- DCHECK_NE(into->stage, NodeStage::kPruned);
-
- if (from->IsPrecolored()) {
- // If we have one pre-colored node, make sure it's the `into` node.
- std::swap(from, into);
- }
-
- if (from == into) {
- // These nodes have already been coalesced.
- opportunity->stage = CoalesceStage::kDefunct;
- CheckTransitionFromFreezeWorklist(from);
- } else if (from->IsPrecolored() || from->ContainsInterference(into)) {
- // These nodes interfere.
- opportunity->stage = CoalesceStage::kDefunct;
- CheckTransitionFromFreezeWorklist(from);
- CheckTransitionFromFreezeWorklist(into);
- } else if (PrecoloredHeuristic(from, into)
- || UncoloredHeuristic(from, into)) {
- // We can coalesce these nodes.
- opportunity->stage = CoalesceStage::kDefunct;
- Combine(from, into);
- CheckTransitionFromFreezeWorklist(into);
- } else {
- // We cannot coalesce, but we may be able to later.
- opportunity->stage = CoalesceStage::kActive;
- }
-}
-
-// Build a mask with a bit set for each register assigned to some
-// interval in `intervals`.
-template <typename Container>
-static std::bitset<kMaxNumRegs> BuildConflictMask(const Container& intervals) {
- std::bitset<kMaxNumRegs> conflict_mask;
- for (InterferenceNode* adjacent : intervals) {
- LiveInterval* conflicting = adjacent->GetInterval();
- if (conflicting->HasRegister()) {
- conflict_mask.set(conflicting->GetRegister());
- if (conflicting->HasHighInterval()) {
- DCHECK(conflicting->GetHighInterval()->HasRegister());
- conflict_mask.set(conflicting->GetHighInterval()->GetRegister());
- }
- } else {
- DCHECK(!conflicting->HasHighInterval()
- || !conflicting->GetHighInterval()->HasRegister());
- }
- }
- return conflict_mask;
-}
-
-bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) {
- return processing_core_regs
- ? !codegen_->IsCoreCalleeSaveRegister(reg)
- : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
-}
-
-static bool RegisterIsAligned(size_t reg) {
- return reg % 2 == 0;
-}
-
-static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask) {
- // We use CTZ (count trailing zeros) to quickly find the lowest 0 bit.
- // Note that CTZ is undefined if all bits are 0, so we special-case it.
- return conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong());
-}
-
-bool ColoringIteration::ColorInterferenceGraph() {
- DCHECK_LE(num_regs_, kMaxNumRegs) << "kMaxNumRegs is too small";
- ScopedArenaVector<LiveInterval*> colored_intervals(
- allocator_->Adapter(kArenaAllocRegisterAllocator));
- bool successful = true;
-
- while (!pruned_nodes_.empty()) {
- InterferenceNode* node = pruned_nodes_.top();
- pruned_nodes_.pop();
- LiveInterval* interval = node->GetInterval();
- size_t reg = 0;
-
- InterferenceNode* alias = node->GetAlias();
- if (alias != node) {
- // This node was coalesced with another.
- LiveInterval* alias_interval = alias->GetInterval();
- if (alias_interval->HasRegister()) {
- reg = alias_interval->GetRegister();
- DCHECK(!BuildConflictMask(node->GetAdjacentNodes())[reg])
- << "This node conflicts with the register it was coalesced with";
- } else {
- DCHECK(false) << node->GetOutDegree() << " " << alias->GetOutDegree() << " "
- << "Move coalescing was not conservative, causing a node to be coalesced "
- << "with another node that could not be colored";
- if (interval->RequiresRegister()) {
- successful = false;
- }
- }
- } else {
- // Search for free register(s).
- std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes());
- if (interval->HasHighInterval()) {
- // Note that the graph coloring allocator assumes that pair intervals are aligned here,
- // excluding pre-colored pair intervals (which can currently be unaligned on x86). If we
- // change the alignment requirements here, we will have to update the algorithm (e.g.,
- // be more conservative about the weight of edges adjacent to pair nodes.)
- while (reg < num_regs_ - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) {
- reg += 2;
- }
-
- // Try to use a caller-save register first.
- for (size_t i = 0; i < num_regs_ - 1; i += 2) {
- bool low_caller_save = register_allocator_->IsCallerSave(i, processing_core_regs_);
- bool high_caller_save = register_allocator_->IsCallerSave(i + 1, processing_core_regs_);
- if (!conflict_mask[i] && !conflict_mask[i + 1]) {
- if (low_caller_save && high_caller_save) {
- reg = i;
- break;
- } else if (low_caller_save || high_caller_save) {
- reg = i;
- // Keep looking to try to get both parts in caller-save registers.
- }
- }
- }
- } else {
- // Not a pair interval.
- reg = FindFirstZeroInConflictMask(conflict_mask);
-
- // Try to use caller-save registers first.
- for (size_t i = 0; i < num_regs_; ++i) {
- if (!conflict_mask[i] && register_allocator_->IsCallerSave(i, processing_core_regs_)) {
- reg = i;
- break;
- }
- }
- }
-
- // Last-chance coalescing.
- for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
- if (opportunity->stage == CoalesceStage::kDefunct) {
- continue;
- }
- LiveInterval* other_interval = opportunity->node_a->GetAlias() == node
- ? opportunity->node_b->GetAlias()->GetInterval()
- : opportunity->node_a->GetAlias()->GetInterval();
- if (other_interval->HasRegister()) {
- size_t coalesce_register = other_interval->GetRegister();
- if (interval->HasHighInterval()) {
- if (!conflict_mask[coalesce_register] &&
- !conflict_mask[coalesce_register + 1] &&
- RegisterIsAligned(coalesce_register)) {
- reg = coalesce_register;
- break;
- }
- } else if (!conflict_mask[coalesce_register]) {
- reg = coalesce_register;
- break;
- }
- }
- }
- }
-
- if (reg < (interval->HasHighInterval() ? num_regs_ - 1 : num_regs_)) {
- // Assign register.
- DCHECK(!interval->HasRegister());
- interval->SetRegister(reg);
- colored_intervals.push_back(interval);
- if (interval->HasHighInterval()) {
- DCHECK(!interval->GetHighInterval()->HasRegister());
- interval->GetHighInterval()->SetRegister(reg + 1);
- colored_intervals.push_back(interval->GetHighInterval());
- }
- } else if (interval->RequiresRegister()) {
- // The interference graph is too dense to color. Make it sparser by
- // splitting this live interval.
- successful = false;
- register_allocator_->SplitAtRegisterUses(interval);
- // We continue coloring, because there may be additional intervals that cannot
- // be colored, and that we should split.
- } else {
- // Spill.
- node->SetNeedsSpillSlot();
- }
- }
-
- // If unsuccessful, reset all register assignments.
- if (!successful) {
- for (LiveInterval* interval : colored_intervals) {
- interval->ClearRegister();
- }
- }
-
- return successful;
-}
-
-void RegisterAllocatorGraphColor::AllocateSpillSlots(ArrayRef<InterferenceNode* const> nodes) {
- // The register allocation resolver will organize the stack based on value type,
- // so we assign stack slots for each value type separately.
- ScopedArenaAllocator allocator(allocator_->GetArenaStack());
- ScopedArenaAllocatorAdapter<void> adapter = allocator.Adapter(kArenaAllocRegisterAllocator);
- ScopedArenaVector<LiveInterval*> double_intervals(adapter);
- ScopedArenaVector<LiveInterval*> long_intervals(adapter);
- ScopedArenaVector<LiveInterval*> float_intervals(adapter);
- ScopedArenaVector<LiveInterval*> int_intervals(adapter);
-
- // The set of parent intervals already handled.
- ScopedArenaSet<LiveInterval*> seen(adapter);
-
- // Find nodes that need spill slots.
- for (InterferenceNode* node : nodes) {
- if (!node->NeedsSpillSlot()) {
- continue;
- }
-
- LiveInterval* parent = node->GetInterval()->GetParent();
- if (seen.find(parent) != seen.end()) {
- // We've already handled this interval.
- // This can happen if multiple siblings of the same interval request a stack slot.
- continue;
- }
- seen.insert(parent);
-
- HInstruction* defined_by = parent->GetDefinedBy();
- if (parent->HasSpillSlot()) {
- // We already have a spill slot for this value that we can reuse.
- } else if (defined_by->IsParameterValue()) {
- // Parameters already have a stack slot.
- parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
- } else if (defined_by->IsCurrentMethod()) {
- // The current method is always at stack slot 0.
- parent->SetSpillSlot(0);
- } else if (defined_by->IsConstant()) {
- // Constants don't need a spill slot.
- } else {
- // We need to find a spill slot for this interval. Place it in the correct
- // worklist to be processed later.
- switch (node->GetInterval()->GetType()) {
- case DataType::Type::kFloat64:
- double_intervals.push_back(parent);
- break;
- case DataType::Type::kInt64:
- long_intervals.push_back(parent);
- break;
- case DataType::Type::kFloat32:
- float_intervals.push_back(parent);
- break;
- case DataType::Type::kReference:
- case DataType::Type::kInt32:
- case DataType::Type::kUint16:
- case DataType::Type::kUint8:
- case DataType::Type::kInt8:
- case DataType::Type::kBool:
- case DataType::Type::kInt16:
- int_intervals.push_back(parent);
- break;
- case DataType::Type::kUint32:
- case DataType::Type::kUint64:
- case DataType::Type::kVoid:
- LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType();
- UNREACHABLE();
- }
- }
- }
-
- // Color spill slots for each value type.
- ColorSpillSlots(ArrayRef<LiveInterval* const>(double_intervals), &num_double_spill_slots_);
- ColorSpillSlots(ArrayRef<LiveInterval* const>(long_intervals), &num_long_spill_slots_);
- ColorSpillSlots(ArrayRef<LiveInterval* const>(float_intervals), &num_float_spill_slots_);
- ColorSpillSlots(ArrayRef<LiveInterval* const>(int_intervals), &num_int_spill_slots_);
-}
-
-void RegisterAllocatorGraphColor::ColorSpillSlots(ArrayRef<LiveInterval* const> intervals,
- /* out */ size_t* num_stack_slots_used) {
- // We cannot use the original interference graph here because spill slots are assigned to
- // all of the siblings of an interval, whereas an interference node represents only a single
- // sibling. So, we assign spill slots linear-scan-style by sorting all the interval endpoints
- // by position, and assigning the lowest spill slot available when we encounter an interval
- // beginning. We ignore lifetime holes for simplicity.
- ScopedArenaAllocator allocator(allocator_->GetArenaStack());
- ScopedArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints(
- allocator.Adapter(kArenaAllocRegisterAllocator));
-
- for (LiveInterval* parent_interval : intervals) {
- DCHECK(parent_interval->IsParent());
- DCHECK(!parent_interval->HasSpillSlot());
- size_t start = parent_interval->GetStart();
- size_t end = parent_interval->GetLastSibling()->GetEnd();
- DCHECK_LT(start, end);
- interval_endpoints.push_back(std::make_tuple(start, true, parent_interval));
- interval_endpoints.push_back(std::make_tuple(end, false, parent_interval));
- }
-
- // Sort by position.
- // We explicitly ignore the third entry of each tuple (the interval pointer) in order
- // to maintain determinism.
- std::sort(interval_endpoints.begin(), interval_endpoints.end(),
- [] (const std::tuple<size_t, bool, LiveInterval*>& lhs,
- const std::tuple<size_t, bool, LiveInterval*>& rhs) {
- return std::tie(std::get<0>(lhs), std::get<1>(lhs))
- < std::tie(std::get<0>(rhs), std::get<1>(rhs));
- });
-
- ArenaBitVector taken(&allocator, 0, true, kArenaAllocRegisterAllocator);
- for (auto it = interval_endpoints.begin(), end = interval_endpoints.end(); it != end; ++it) {
- // Extract information from the current tuple.
- LiveInterval* parent_interval;
- bool is_interval_beginning;
- size_t position;
- std::tie(position, is_interval_beginning, parent_interval) = *it;
- size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded();
-
- if (is_interval_beginning) {
- DCHECK(!parent_interval->HasSpillSlot());
- DCHECK_EQ(position, parent_interval->GetStart());
-
- // Find first available free stack slot(s).
- size_t slot = 0;
- for (; ; ++slot) {
- bool found = true;
- for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
- if (taken.IsBitSet(s)) {
- found = false;
- break; // failure
- }
- }
- if (found) {
- break; // success
- }
- }
-
- parent_interval->SetSpillSlot(slot);
-
- *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed);
- if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) {
- // The parallel move resolver requires that there be an even number of spill slots
- // allocated for pair value types.
- ++(*num_stack_slots_used);
- }
-
- for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
- taken.SetBit(s);
- }
- } else {
- DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd());
- DCHECK(parent_interval->HasSpillSlot());
-
- // Free up the stack slot(s) used by this interval.
- size_t slot = parent_interval->GetSpillSlot();
- for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
- DCHECK(taken.IsBitSet(s));
- taken.ClearBit(s);
- }
- }
- }
- DCHECK_EQ(taken.NumSetBits(), 0u);
-}
-
-} // namespace art
diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h
deleted file mode 100644
index 0e10152049..0000000000
--- a/compiler/optimizing/register_allocator_graph_color.h
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
-#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
-
-#include "arch/instruction_set.h"
-#include "base/arena_object.h"
-#include "base/array_ref.h"
-#include "base/macros.h"
-#include "base/scoped_arena_containers.h"
-#include "register_allocator.h"
-
-namespace art HIDDEN {
-
-class CodeGenerator;
-class HBasicBlock;
-class HGraph;
-class HInstruction;
-class HParallelMove;
-class Location;
-class SsaLivenessAnalysis;
-class InterferenceNode;
-struct CoalesceOpportunity;
-enum class CoalesceKind;
-
-/**
- * A graph coloring register allocator.
- *
- * The algorithm proceeds as follows:
- * (1) Build an interference graph, where nodes represent live intervals, and edges represent
- * interferences between two intervals. Coloring this graph with k colors is isomorphic to
- * finding a valid register assignment with k registers.
- * (2) To color the graph, first prune all nodes with degree less than k, since these nodes are
- * guaranteed a color. (No matter how we color their adjacent nodes, we can give them a
- * different color.) As we prune nodes from the graph, more nodes may drop below degree k,
- * enabling further pruning. The key is to maintain the pruning order in a stack, so that we
- * can color the nodes in the reverse order.
- * When there are no more nodes with degree less than k, we start pruning alternate nodes based
- * on heuristics. Since these nodes are not guaranteed a color, we are careful to
- * prioritize nodes that require a register. We also prioritize short intervals, because
- * short intervals cannot be split very much if coloring fails (see below). "Prioritizing"
- * a node amounts to pruning it later, since it will have fewer interferences if we prune other
- * nodes first.
- * (3) We color nodes in the reverse order in which we pruned them. If we cannot assign
- * a node a color, we do one of two things:
- * - If the node requires a register, we consider the current coloring attempt a failure.
- * However, we split the node's live interval in order to make the interference graph
- * sparser, so that future coloring attempts may succeed.
- * - If the node does not require a register, we simply assign it a location on the stack.
- *
- * If iterative move coalescing is enabled, the algorithm also attempts to conservatively
- * combine nodes in the graph that would prefer to have the same color. (For example, the output
- * of a phi instruction would prefer to have the same register as at least one of its inputs.)
- * There are several additional steps involved with this:
- * - We look for coalesce opportunities by examining each live interval, a step similar to that
- * used by linear scan when looking for register hints.
- * - When pruning the graph, we maintain a worklist of coalesce opportunities, as well as a worklist
- * of low degree nodes that have associated coalesce opportunities. Only when we run out of
- * coalesce opportunities do we start pruning coalesce-associated nodes.
- * - When pruning a node, if any nodes transition from high degree to low degree, we add
- * associated coalesce opportunities to the worklist, since these opportunities may now succeed.
- * - Whether two nodes can be combined is decided by two different heuristics--one used when
- * coalescing uncolored nodes, and one used for coalescing an uncolored node with a colored node.
- * It is vital that we only combine two nodes if the node that remains is guaranteed to receive
- * a color. This is because additionally spilling is more costly than failing to coalesce.
- * - Even if nodes are not coalesced while pruning, we keep the coalesce opportunities around
- * to be used as last-chance register hints when coloring. If nothing else, we try to use
- * caller-save registers before callee-save registers.
- *
- * A good reference for graph coloring register allocation is
- * "Modern Compiler Implementation in Java" (Andrew W. Appel, 2nd Edition).
- */
-class RegisterAllocatorGraphColor : public RegisterAllocator {
- public:
- RegisterAllocatorGraphColor(ScopedArenaAllocator* allocator,
- CodeGenerator* codegen,
- const SsaLivenessAnalysis& analysis,
- bool iterative_move_coalescing = true);
- ~RegisterAllocatorGraphColor() override;
-
- void AllocateRegisters() override;
-
- bool Validate(bool log_fatal_on_failure) override;
-
- private:
- // Collect all intervals and prepare for register allocation.
- void ProcessInstructions();
- void ProcessInstruction(HInstruction* instruction);
-
- // If any inputs require specific registers, block those registers
- // at the position of this instruction.
- void CheckForFixedInputs(HInstruction* instruction);
-
- // If the output of an instruction requires a specific register, split
- // the interval and assign the register to the first part.
- void CheckForFixedOutput(HInstruction* instruction);
-
- // Add all applicable safepoints to a live interval.
- // Currently depends on instruction processing order.
- void AddSafepointsFor(HInstruction* instruction);
-
- // Collect all live intervals associated with the temporary locations
- // needed by an instruction.
- void CheckForTempLiveIntervals(HInstruction* instruction);
-
- // If a safe point is needed, add a synthesized interval to later record
- // the number of live registers at this point.
- void CheckForSafepoint(HInstruction* instruction);
-
- // Try to remove the SuspendCheck at function entry. Returns true if it was successful.
- bool TryRemoveSuspendCheckEntry(HInstruction* instruction);
-
- // Split an interval, but only if `position` is inside of `interval`.
- // Return either the new interval, or the original interval if not split.
- static LiveInterval* TrySplit(LiveInterval* interval, size_t position);
-
- // To ensure every graph can be colored, split live intervals
- // at their register defs and uses. This creates short intervals with low
- // degree in the interference graph, which are prioritized during graph
- // coloring.
- void SplitAtRegisterUses(LiveInterval* interval);
-
- // If the given instruction is a catch phi, give it a spill slot.
- void AllocateSpillSlotForCatchPhi(HInstruction* instruction);
-
- // Ensure that the given register cannot be allocated for a given range.
- void BlockRegister(Location location, size_t start, size_t end);
- void BlockRegisters(size_t start, size_t end, bool caller_save_only = false);
-
- bool IsCallerSave(size_t reg, bool processing_core_regs);
-
- // Assigns stack slots to a list of intervals, ensuring that interfering intervals are not
- // assigned the same stack slot.
- void ColorSpillSlots(ArrayRef<LiveInterval* const> nodes, /* out */ size_t* num_stack_slots_used);
-
- // Provide stack slots to nodes that need them.
- void AllocateSpillSlots(ArrayRef<InterferenceNode* const> nodes);
-
- // Whether iterative move coalescing should be performed. Iterative move coalescing
- // improves code quality, but increases compile time.
- const bool iterative_move_coalescing_;
-
- // Live intervals, split by kind (core and floating point).
- // These should not contain high intervals, as those are represented by
- // the corresponding low interval throughout register allocation.
- ScopedArenaVector<LiveInterval*> core_intervals_;
- ScopedArenaVector<LiveInterval*> fp_intervals_;
-
- // Intervals for temporaries, saved for special handling in the resolution phase.
- ScopedArenaVector<LiveInterval*> temp_intervals_;
-
- // Safepoints, saved for special handling while processing instructions.
- ScopedArenaVector<HInstruction*> safepoints_;
-
- // Interference nodes representing specific registers. These are "pre-colored" nodes
- // in the interference graph.
- ScopedArenaVector<InterferenceNode*> physical_core_nodes_;
- ScopedArenaVector<InterferenceNode*> physical_fp_nodes_;
-
- // Allocated stack slot counters.
- size_t num_int_spill_slots_;
- size_t num_double_spill_slots_;
- size_t num_float_spill_slots_;
- size_t num_long_spill_slots_;
- size_t catch_phi_spill_slot_counter_;
-
- // Number of stack slots needed for the pointer to the current method.
- // This is 1 for 32-bit architectures, and 2 for 64-bit architectures.
- const size_t reserved_art_method_slots_;
-
- // Number of stack slots needed for outgoing arguments.
- const size_t reserved_out_slots_;
-
- friend class ColoringIteration;
-
- DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorGraphColor);
-};
-
-} // namespace art
-
-#endif // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
index fcdaa2d34f..ffa9937cc5 100644
--- a/compiler/optimizing/register_allocator_linear_scan.cc
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -1208,8 +1208,7 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotForCatchPhi(HPhi* phi) {
LiveInterval* interval = phi->GetLiveInterval();
HInstruction* previous_phi = phi->GetPrevious();
- DCHECK(previous_phi == nullptr ||
- previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
+ DCHECK(previous_phi == nullptr || previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
<< "Phis expected to be sorted by vreg number, so that equivalent phis are adjacent.";
if (phi->IsVRegEquivalentOf(previous_phi)) {
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index d316aa5dc2..0d2d20682d 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -84,7 +84,8 @@ class RegisterAllocatorTest : public CommonCompilerTest, public OptimizingUnitTe
TEST_F(RegisterAllocatorTest, test_name##_LinearScan) {\
test_name(Strategy::kRegisterAllocatorLinearScan);\
}\
-TEST_F(RegisterAllocatorTest, test_name##_GraphColor) {\
+/* Note: Graph coloring register allocator has been removed, so the test is DISABLED. */ \
+TEST_F(RegisterAllocatorTest, DISABLED_##test_name##_GraphColor) {\
test_name(Strategy::kRegisterAllocatorGraphColor);\
}
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index 116f52605e..1cdc98a8be 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -490,9 +490,9 @@ SchedulingNode* CriticalPathSchedulingNodeSelector::SelectMaterializedCondition(
DCHECK(instruction != nullptr);
if (instruction->IsIf()) {
- condition = instruction->AsIf()->InputAt(0)->AsCondition();
+ condition = instruction->AsIf()->InputAt(0)->AsConditionOrNull();
} else if (instruction->IsSelect()) {
- condition = instruction->AsSelect()->GetCondition()->AsCondition();
+ condition = instruction->AsSelect()->GetCondition()->AsConditionOrNull();
}
SchedulingNode* condition_node = (condition != nullptr) ? graph.GetNode(condition) : nullptr;
diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc
index 3f931c4c49..53ad2b12c0 100644
--- a/compiler/optimizing/scheduler_arm.cc
+++ b/compiler/optimizing/scheduler_arm.cc
@@ -610,7 +610,7 @@ void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifte
}
}
-void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM::VisitIntermediateAddress([[maybe_unused]] HIntermediateAddress*) {
// Although the code generated is a simple `add` instruction, we found through empirical results
// that spacing it from its use in memory accesses was beneficial.
last_visited_internal_latency_ = kArmNopLatency;
@@ -618,11 +618,11 @@ void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress*
}
void SchedulingLatencyVisitorARM::VisitIntermediateAddressIndex(
- HIntermediateAddressIndex* ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HIntermediateAddressIndex*) {
UNIMPLEMENTED(FATAL) << "IntermediateAddressIndex is not implemented for ARM";
}
-void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate([[maybe_unused]] HMultiplyAccumulate*) {
last_visited_latency_ = kArmMulIntegerLatency;
}
@@ -806,7 +806,7 @@ void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) {
}
}
-void SchedulingLatencyVisitorARM::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM::VisitBoundsCheck([[maybe_unused]] HBoundsCheck*) {
last_visited_internal_latency_ = kArmIntegerOpLatency;
// Users do not use any data results.
last_visited_latency_ = 0;
@@ -866,22 +866,22 @@ void SchedulingLatencyVisitorARM::VisitInstanceFieldSet(HInstanceFieldSet* instr
HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
}
-void SchedulingLatencyVisitorARM::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM::VisitInstanceOf([[maybe_unused]] HInstanceOf*) {
last_visited_internal_latency_ = kArmCallInternalLatency;
last_visited_latency_ = kArmIntegerOpLatency;
}
-void SchedulingLatencyVisitorARM::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM::VisitInvoke([[maybe_unused]] HInvoke*) {
last_visited_internal_latency_ = kArmCallInternalLatency;
last_visited_latency_ = kArmCallLatency;
}
-void SchedulingLatencyVisitorARM::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM::VisitLoadString([[maybe_unused]] HLoadString*) {
last_visited_internal_latency_ = kArmLoadStringInternalLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
}
-void SchedulingLatencyVisitorARM::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM::VisitNewArray([[maybe_unused]] HNewArray*) {
last_visited_internal_latency_ = kArmIntegerOpLatency + kArmCallInternalLatency;
last_visited_latency_ = kArmCallLatency;
}
diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h
index 0da21c187f..cedc12a2be 100644
--- a/compiler/optimizing/scheduler_arm.h
+++ b/compiler/optimizing/scheduler_arm.h
@@ -53,7 +53,7 @@ class SchedulingLatencyVisitorARM final : public SchedulingLatencyVisitor {
: codegen_(down_cast<CodeGeneratorARMVIXL*>(codegen)) {}
// Default visitor for instructions not handled specifically below.
- void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
+ void VisitInstruction([[maybe_unused]] HInstruction*) override {
last_visited_latency_ = kArmIntegerOpLatency;
}
diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc
index 3071afd951..5113cf446d 100644
--- a/compiler/optimizing/scheduler_arm64.cc
+++ b/compiler/optimizing/scheduler_arm64.cc
@@ -30,30 +30,30 @@ void SchedulingLatencyVisitorARM64::VisitBinaryOperation(HBinaryOperation* instr
}
void SchedulingLatencyVisitorARM64::VisitBitwiseNegatedRight(
- HBitwiseNegatedRight* ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HBitwiseNegatedRight*) {
last_visited_latency_ = kArm64IntegerOpLatency;
}
void SchedulingLatencyVisitorARM64::VisitDataProcWithShifterOp(
- HDataProcWithShifterOp* ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HDataProcWithShifterOp*) {
last_visited_latency_ = kArm64DataProcWithShifterOpLatency;
}
void SchedulingLatencyVisitorARM64::VisitIntermediateAddress(
- HIntermediateAddress* ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HIntermediateAddress*) {
// Although the code generated is a simple `add` instruction, we found through empirical results
// that spacing it from its use in memory accesses was beneficial.
last_visited_latency_ = kArm64IntegerOpLatency + 2;
}
void SchedulingLatencyVisitorARM64::VisitIntermediateAddressIndex(
- HIntermediateAddressIndex* instr ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HIntermediateAddressIndex* instr) {
// Although the code generated is a simple `add` instruction, we found through empirical results
// that spacing it from its use in memory accesses was beneficial.
last_visited_latency_ = kArm64DataProcWithShifterOpLatency + 2;
}
-void SchedulingLatencyVisitorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitMultiplyAccumulate([[maybe_unused]] HMultiplyAccumulate*) {
last_visited_latency_ = kArm64MulIntegerLatency;
}
@@ -65,15 +65,15 @@ void SchedulingLatencyVisitorARM64::VisitArrayGet(HArrayGet* instruction) {
last_visited_latency_ = kArm64MemoryLoadLatency;
}
-void SchedulingLatencyVisitorARM64::VisitArrayLength(HArrayLength* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitArrayLength([[maybe_unused]] HArrayLength*) {
last_visited_latency_ = kArm64MemoryLoadLatency;
}
-void SchedulingLatencyVisitorARM64::VisitArraySet(HArraySet* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitArraySet([[maybe_unused]] HArraySet*) {
last_visited_latency_ = kArm64MemoryStoreLatency;
}
-void SchedulingLatencyVisitorARM64::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitBoundsCheck([[maybe_unused]] HBoundsCheck*) {
last_visited_internal_latency_ = kArm64IntegerOpLatency;
// Users do not use any data results.
last_visited_latency_ = 0;
@@ -113,21 +113,21 @@ void SchedulingLatencyVisitorARM64::VisitDiv(HDiv* instr) {
}
}
-void SchedulingLatencyVisitorARM64::VisitInstanceFieldGet(HInstanceFieldGet* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitInstanceFieldGet([[maybe_unused]] HInstanceFieldGet*) {
last_visited_latency_ = kArm64MemoryLoadLatency;
}
-void SchedulingLatencyVisitorARM64::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitInstanceOf([[maybe_unused]] HInstanceOf*) {
last_visited_internal_latency_ = kArm64CallInternalLatency;
last_visited_latency_ = kArm64IntegerOpLatency;
}
-void SchedulingLatencyVisitorARM64::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitInvoke([[maybe_unused]] HInvoke*) {
last_visited_internal_latency_ = kArm64CallInternalLatency;
last_visited_latency_ = kArm64CallLatency;
}
-void SchedulingLatencyVisitorARM64::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitLoadString([[maybe_unused]] HLoadString*) {
last_visited_internal_latency_ = kArm64LoadStringInternalLatency;
last_visited_latency_ = kArm64MemoryLoadLatency;
}
@@ -138,7 +138,7 @@ void SchedulingLatencyVisitorARM64::VisitMul(HMul* instr) {
: kArm64MulIntegerLatency;
}
-void SchedulingLatencyVisitorARM64::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitNewArray([[maybe_unused]] HNewArray*) {
last_visited_internal_latency_ = kArm64IntegerOpLatency + kArm64CallInternalLatency;
last_visited_latency_ = kArm64CallLatency;
}
@@ -181,7 +181,7 @@ void SchedulingLatencyVisitorARM64::VisitRem(HRem* instruction) {
}
}
-void SchedulingLatencyVisitorARM64::VisitStaticFieldGet(HStaticFieldGet* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitStaticFieldGet([[maybe_unused]] HStaticFieldGet*) {
last_visited_latency_ = kArm64MemoryLoadLatency;
}
@@ -211,7 +211,7 @@ void SchedulingLatencyVisitorARM64::HandleSimpleArithmeticSIMD(HVecOperation *in
}
void SchedulingLatencyVisitorARM64::VisitVecReplicateScalar(
- HVecReplicateScalar* instr ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HVecReplicateScalar* instr) {
last_visited_latency_ = kArm64SIMDReplicateOpLatency;
}
@@ -223,7 +223,7 @@ void SchedulingLatencyVisitorARM64::VisitVecReduce(HVecReduce* instr) {
HandleSimpleArithmeticSIMD(instr);
}
-void SchedulingLatencyVisitorARM64::VisitVecCnv(HVecCnv* instr ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitVecCnv([[maybe_unused]] HVecCnv* instr) {
last_visited_latency_ = kArm64SIMDTypeConversionInt2FPLatency;
}
@@ -279,19 +279,19 @@ void SchedulingLatencyVisitorARM64::VisitVecMax(HVecMax* instr) {
HandleSimpleArithmeticSIMD(instr);
}
-void SchedulingLatencyVisitorARM64::VisitVecAnd(HVecAnd* instr ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitVecAnd([[maybe_unused]] HVecAnd* instr) {
last_visited_latency_ = kArm64SIMDIntegerOpLatency;
}
-void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitVecAndNot([[maybe_unused]] HVecAndNot* instr) {
last_visited_latency_ = kArm64SIMDIntegerOpLatency;
}
-void SchedulingLatencyVisitorARM64::VisitVecOr(HVecOr* instr ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitVecOr([[maybe_unused]] HVecOr* instr) {
last_visited_latency_ = kArm64SIMDIntegerOpLatency;
}
-void SchedulingLatencyVisitorARM64::VisitVecXor(HVecXor* instr ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitVecXor([[maybe_unused]] HVecXor* instr) {
last_visited_latency_ = kArm64SIMDIntegerOpLatency;
}
@@ -312,13 +312,12 @@ void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) {
}
void SchedulingLatencyVisitorARM64::VisitVecMultiplyAccumulate(
- HVecMultiplyAccumulate* instr ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HVecMultiplyAccumulate* instr) {
last_visited_latency_ = kArm64SIMDMulIntegerLatency;
}
-void SchedulingLatencyVisitorARM64::HandleVecAddress(
- HVecMemoryOperation* instruction,
- size_t size ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::HandleVecAddress(HVecMemoryOperation* instruction,
+ [[maybe_unused]] size_t size) {
HInstruction* index = instruction->InputAt(1);
if (!index->IsConstant()) {
last_visited_internal_latency_ += kArm64DataProcWithShifterOpLatency;
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
index ec41577e9d..7ce00e00ab 100644
--- a/compiler/optimizing/scheduler_arm64.h
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -59,7 +59,7 @@ static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10;
class SchedulingLatencyVisitorARM64 final : public SchedulingLatencyVisitor {
public:
// Default visitor for instructions not handled specifically below.
- void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
+ void VisitInstruction([[maybe_unused]] HInstruction*) override {
last_visited_latency_ = kArm64IntegerOpLatency;
}
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
index 6a10440d11..07065efbb7 100644
--- a/compiler/optimizing/select_generator.cc
+++ b/compiler/optimizing/select_generator.cc
@@ -46,8 +46,7 @@ static bool IsSimpleBlock(HBasicBlock* block) {
} else if (instruction->CanBeMoved() &&
!instruction->HasSideEffects() &&
!instruction->CanThrow()) {
- if (instruction->IsSelect() &&
- instruction->AsSelect()->GetCondition()->GetBlock() == block) {
+ if (instruction->IsSelect() && instruction->AsSelect()->GetCondition()->GetBlock() == block) {
// Count one HCondition and HSelect in the same block as a single instruction.
// This enables finding nested selects.
continue;
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index a658252e69..2179bf50b5 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -604,7 +604,7 @@ GraphAnalysisResult SsaBuilder::BuildSsa() {
*/
HFloatConstant* SsaBuilder::GetFloatEquivalent(HIntConstant* constant) {
// We place the floating point constant next to this constant.
- HFloatConstant* result = constant->GetNext()->AsFloatConstant();
+ HFloatConstant* result = constant->GetNext()->AsFloatConstantOrNull();
if (result == nullptr) {
float value = bit_cast<float, int32_t>(constant->GetValue());
result = new (graph_->GetAllocator()) HFloatConstant(value);
@@ -626,7 +626,7 @@ HFloatConstant* SsaBuilder::GetFloatEquivalent(HIntConstant* constant) {
*/
HDoubleConstant* SsaBuilder::GetDoubleEquivalent(HLongConstant* constant) {
// We place the floating point constant next to this constant.
- HDoubleConstant* result = constant->GetNext()->AsDoubleConstant();
+ HDoubleConstant* result = constant->GetNext()->AsDoubleConstantOrNull();
if (result == nullptr) {
double value = bit_cast<double, int64_t>(constant->GetValue());
result = new (graph_->GetAllocator()) HDoubleConstant(value);
@@ -652,16 +652,16 @@ HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, DataType::
// We place the floating point /reference phi next to this phi.
HInstruction* next = phi->GetNext();
- if (next != nullptr
- && next->AsPhi()->GetRegNumber() == phi->GetRegNumber()
- && next->GetType() != type) {
+ if (next != nullptr &&
+ next->AsPhi()->GetRegNumber() == phi->GetRegNumber() &&
+ next->GetType() != type) {
// Move to the next phi to see if it is the one we are looking for.
next = next->GetNext();
}
- if (next == nullptr
- || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber())
- || (next->GetType() != type)) {
+ if (next == nullptr ||
+ (next->AsPhi()->GetRegNumber() != phi->GetRegNumber()) ||
+ (next->GetType() != type)) {
ArenaAllocator* allocator = graph_->GetAllocator();
HInputsRef inputs = phi->GetInputs();
HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), inputs.size(), type);
diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc
index 2df0f34c7d..18c945381d 100644
--- a/compiler/optimizing/ssa_liveness_analysis_test.cc
+++ b/compiler/optimizing/ssa_liveness_analysis_test.cc
@@ -31,6 +31,7 @@ namespace art HIDDEN {
class SsaLivenessAnalysisTest : public OptimizingUnitTest {
protected:
void SetUp() override {
+ TEST_SETUP_DISABLED_FOR_RISCV64();
OptimizingUnitTest::SetUp();
graph_ = CreateGraph();
compiler_options_ = CommonCompilerTest::CreateCompilerOptions(kRuntimeISA, "default");
@@ -42,6 +43,11 @@ class SsaLivenessAnalysisTest : public OptimizingUnitTest {
graph_->SetEntryBlock(entry_);
}
+ void TearDown() override {
+ TEST_TEARDOWN_DISABLED_FOR_RISCV64();
+ OptimizingUnitTest::TearDown();
+ }
+
protected:
HBasicBlock* CreateSuccessor(HBasicBlock* block) {
HGraph* graph = block->GetGraph();
@@ -58,6 +64,7 @@ class SsaLivenessAnalysisTest : public OptimizingUnitTest {
};
TEST_F(SsaLivenessAnalysisTest, TestReturnArg) {
+ TEST_DISABLED_FOR_RISCV64();
HInstruction* arg = new (GetAllocator()) HParameterValue(
graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kInt32);
entry_->AddInstruction(arg);
@@ -78,6 +85,7 @@ TEST_F(SsaLivenessAnalysisTest, TestReturnArg) {
}
TEST_F(SsaLivenessAnalysisTest, TestAput) {
+ TEST_DISABLED_FOR_RISCV64();
HInstruction* array = new (GetAllocator()) HParameterValue(
graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference);
HInstruction* index = new (GetAllocator()) HParameterValue(
@@ -147,6 +155,7 @@ TEST_F(SsaLivenessAnalysisTest, TestAput) {
}
TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) {
+ TEST_DISABLED_FOR_RISCV64();
HInstruction* array = new (GetAllocator()) HParameterValue(
graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference);
HInstruction* index = new (GetAllocator()) HParameterValue(
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index ce343dffec..1d9be3956a 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -76,7 +76,7 @@ void SsaDeadPhiElimination::MarkDeadPhis() {
HPhi* phi = worklist.back();
worklist.pop_back();
for (HInstruction* raw_input : phi->GetInputs()) {
- HPhi* input = raw_input->AsPhi();
+ HPhi* input = raw_input->AsPhiOrNull();
if (input != nullptr && input->IsDead()) {
// Input is a dead phi. Revive it and add to the worklist. We make sure
// that the phi was not dead initially (see definition of `initially_live`).
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 1a368ed347..2ecda7610e 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -51,7 +51,8 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes,
size_t fp_spill_mask,
uint32_t num_dex_registers,
bool baseline,
- bool debuggable) {
+ bool debuggable,
+ bool has_should_deoptimize_flag) {
DCHECK(!in_method_) << "Mismatched Begin/End calls";
in_method_ = true;
DCHECK_EQ(packed_frame_size_, 0u) << "BeginMethod was already called";
@@ -63,6 +64,7 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes,
num_dex_registers_ = num_dex_registers;
baseline_ = baseline;
debuggable_ = debuggable;
+ has_should_deoptimize_flag_ = has_should_deoptimize_flag;
if (kVerifyStackMaps) {
dchecks_.emplace_back([=](const CodeInfo& code_info) {
@@ -152,8 +154,10 @@ void StackMapStream::BeginStackMapEntry(
// Create lambda method, which will be executed at the very end to verify data.
// Parameters and local variables will be captured(stored) by the lambda "[=]".
dchecks_.emplace_back([=](const CodeInfo& code_info) {
+ // The `native_pc_offset` may have been overridden using `SetStackMapNativePcOffset(.)`.
+ uint32_t final_native_pc_offset = GetStackMapNativePcOffset(stack_map_index);
if (kind == StackMap::Kind::Default || kind == StackMap::Kind::OSR) {
- StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset,
+ StackMap stack_map = code_info.GetStackMapForNativePcOffset(final_native_pc_offset,
instruction_set_);
CHECK_EQ(stack_map.Row(), stack_map_index);
} else if (kind == StackMap::Kind::Catch) {
@@ -162,7 +166,7 @@ void StackMapStream::BeginStackMapEntry(
CHECK_EQ(stack_map.Row(), stack_map_index);
}
StackMap stack_map = code_info.GetStackMapAt(stack_map_index);
- CHECK_EQ(stack_map.GetNativePcOffset(instruction_set_), native_pc_offset);
+ CHECK_EQ(stack_map.GetNativePcOffset(instruction_set_), final_native_pc_offset);
CHECK_EQ(stack_map.GetKind(), static_cast<uint32_t>(kind));
CHECK_EQ(stack_map.GetDexPc(), dex_pc);
CHECK_EQ(code_info.GetRegisterMaskOf(stack_map), register_mask);
@@ -374,10 +378,12 @@ ScopedArenaVector<uint8_t> StackMapStream::Encode() {
DCHECK(in_stack_map_ == false) << "Mismatched Begin/End calls";
DCHECK(in_inline_info_ == false) << "Mismatched Begin/End calls";
- uint32_t flags = (inline_infos_.size() > 0) ? CodeInfo::kHasInlineInfo : 0;
+ uint32_t flags = 0;
+ flags |= (inline_infos_.size() > 0) ? CodeInfo::kHasInlineInfo : 0;
flags |= baseline_ ? CodeInfo::kIsBaseline : 0;
flags |= debuggable_ ? CodeInfo::kIsDebuggable : 0;
- DCHECK_LE(flags, kVarintMax); // Ensure flags can be read directly as byte.
+ flags |= has_should_deoptimize_flag_ ? CodeInfo::kHasShouldDeoptimizeFlag : 0;
+
uint32_t bit_table_flags = 0;
ForEachBitTable([&bit_table_flags](size_t i, auto bit_table) {
if (bit_table->size() != 0) { // Record which bit-tables are stored.
@@ -409,6 +415,8 @@ ScopedArenaVector<uint8_t> StackMapStream::Encode() {
CHECK_EQ(code_info.GetNumberOfStackMaps(), stack_maps_.size());
CHECK_EQ(CodeInfo::HasInlineInfo(buffer.data()), inline_infos_.size() > 0);
CHECK_EQ(CodeInfo::IsBaseline(buffer.data()), baseline_);
+ CHECK_EQ(CodeInfo::IsDebuggable(buffer.data()), debuggable_);
+ CHECK_EQ(CodeInfo::HasShouldDeoptimizeFlag(buffer.data()), has_should_deoptimize_flag_);
// Verify all written data (usually only in debug builds).
if (kVerifyStackMaps) {
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 643af2da94..f027850ce6 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -66,7 +66,8 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> {
size_t fp_spill_mask,
uint32_t num_dex_registers,
bool baseline,
- bool debuggable);
+ bool debuggable,
+ bool has_should_deoptimize_flag = false);
void EndMethod(size_t code_size);
void BeginStackMapEntry(
@@ -129,8 +130,9 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> {
uint32_t core_spill_mask_ = 0;
uint32_t fp_spill_mask_ = 0;
uint32_t num_dex_registers_ = 0;
- bool baseline_;
- bool debuggable_;
+ bool baseline_ = false;
+ bool debuggable_ = false;
+ bool has_should_deoptimize_flag_ = false;
BitTableBuilder<StackMap> stack_maps_;
BitTableBuilder<RegisterMask> register_masks_;
BitmapTableBuilder stack_masks_;
diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc
index e266618980..d86869ce0f 100644
--- a/compiler/optimizing/x86_memory_gen.cc
+++ b/compiler/optimizing/x86_memory_gen.cc
@@ -33,7 +33,7 @@ class MemoryOperandVisitor final : public HGraphVisitor {
private:
void VisitBoundsCheck(HBoundsCheck* check) override {
// Replace the length by the array itself, so that we can do compares to memory.
- HArrayLength* array_len = check->InputAt(1)->AsArrayLength();
+ HArrayLength* array_len = check->InputAt(1)->AsArrayLengthOrNull();
// We only want to replace an ArrayLength.
if (array_len == nullptr) {
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index a122d3c9d3..d9f56629ef 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -28,6 +28,10 @@
#include "utils/arm64/assembler_arm64.h"
#endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+#include "utils/riscv64/assembler_riscv64.h"
+#endif
+
#ifdef ART_ENABLE_CODEGEN_x86
#include "utils/x86/assembler_x86.h"
#endif
@@ -57,9 +61,6 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
ArmVIXLAssembler assembler(allocator);
switch (abi) {
- case kInterpreterAbi: // Thread* is first argument (R0) in interpreter ABI.
- ___ Ldr(pc, MemOperand(r0, offset.Int32Value()));
- break;
case kJniAbi: { // Load via Thread* held in JNIEnv* in first argument (R0).
vixl::aarch32::UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
const vixl::aarch32::Register temp_reg = temps.Acquire();
@@ -78,7 +79,7 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
size_t cs = __ CodeSize();
std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
MemoryRegion code(entry_stub->data(), entry_stub->size());
- __ FinalizeInstructions(code);
+ __ CopyInstructions(code);
return std::move(entry_stub);
}
@@ -95,11 +96,6 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
Arm64Assembler assembler(allocator);
switch (abi) {
- case kInterpreterAbi: // Thread* is first argument (X0) in interpreter ABI.
- __ JumpTo(Arm64ManagedRegister::FromXRegister(X0), Offset(offset.Int32Value()),
- Arm64ManagedRegister::FromXRegister(IP1));
-
- break;
case kJniAbi: // Load via Thread* held in JNIEnv* in first argument (X0).
__ LoadRawPtr(Arm64ManagedRegister::FromXRegister(IP1),
Arm64ManagedRegister::FromXRegister(X0),
@@ -120,13 +116,47 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
size_t cs = __ CodeSize();
std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
MemoryRegion code(entry_stub->data(), entry_stub->size());
- __ FinalizeInstructions(code);
+ __ CopyInstructions(code);
return std::move(entry_stub);
}
} // namespace arm64
#endif // ART_ENABLE_CODEGEN_arm64
+#ifdef ART_ENABLE_CODEGEN_riscv64
+namespace riscv64 {
+static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocator* allocator,
+ EntryPointCallingConvention abi,
+ ThreadOffset64 offset) {
+ Riscv64Assembler assembler(allocator);
+ ScratchRegisterScope srs(&assembler);
+ XRegister tmp = srs.AllocateXRegister();
+
+ switch (abi) {
+ case kJniAbi: // Load via Thread* held in JNIEnv* in first argument (A0).
+ __ Loadd(tmp,
+ A0,
+ JNIEnvExt::SelfOffset(static_cast<size_t>(kRiscv64PointerSize)).Int32Value());
+ __ Loadd(tmp, tmp, offset.Int32Value());
+ __ Jr(tmp);
+ break;
+ case kQuickAbi: // TR holds Thread*.
+ __ Loadd(tmp, TR, offset.Int32Value());
+ __ Jr(tmp);
+ break;
+ }
+
+ __ FinalizeCode();
+ size_t cs = __ CodeSize();
+ std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
+ MemoryRegion code(entry_stub->data(), entry_stub->size());
+ __ CopyInstructions(code);
+
+ return std::move(entry_stub);
+}
+} // namespace riscv64
+#endif // ART_ENABLE_CODEGEN_riscv64
+
#ifdef ART_ENABLE_CODEGEN_x86
namespace x86 {
static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocator* allocator,
@@ -141,7 +171,7 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocat
size_t cs = __ CodeSize();
std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
MemoryRegion code(entry_stub->data(), entry_stub->size());
- __ FinalizeInstructions(code);
+ __ CopyInstructions(code);
return std::move(entry_stub);
}
@@ -162,7 +192,7 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocat
size_t cs = __ CodeSize();
std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
MemoryRegion code(entry_stub->data(), entry_stub->size());
- __ FinalizeInstructions(code);
+ __ CopyInstructions(code);
return std::move(entry_stub);
}
@@ -179,6 +209,10 @@ std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline64(InstructionSet is
case InstructionSet::kArm64:
return arm64::CreateTrampoline(&allocator, abi, offset);
#endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+ case InstructionSet::kRiscv64:
+ return riscv64::CreateTrampoline(&allocator, abi, offset);
+#endif
#ifdef ART_ENABLE_CODEGEN_x86_64
case InstructionSet::kX86_64:
return x86_64::CreateTrampoline(&allocator, offset);
diff --git a/compiler/trampolines/trampoline_compiler.h b/compiler/trampolines/trampoline_compiler.h
index 32e35ae1d6..25b715fab0 100644
--- a/compiler/trampolines/trampoline_compiler.h
+++ b/compiler/trampolines/trampoline_compiler.h
@@ -28,8 +28,6 @@
namespace art HIDDEN {
enum EntryPointCallingConvention {
- // ABI of invocations to a method's interpreter entry point.
- kInterpreterAbi,
// ABI of calls to a method's native code, only used for native methods.
kJniAbi,
// ABI of calls to a method's quick code entry point.
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc
index c7ca003530..d64de09501 100644
--- a/compiler/utils/arm/assembler_arm_vixl.cc
+++ b/compiler/utils/arm/assembler_arm_vixl.cc
@@ -52,7 +52,7 @@ const uint8_t* ArmVIXLAssembler::CodeBufferBaseAddress() const {
return vixl_masm_.GetBuffer().GetStartAddress<const uint8_t*>();
}
-void ArmVIXLAssembler::FinalizeInstructions(const MemoryRegion& region) {
+void ArmVIXLAssembler::CopyInstructions(const MemoryRegion& region) {
// Copy the instructions from the buffer.
MemoryRegion from(vixl_masm_.GetBuffer()->GetStartAddress<void*>(), CodeSize());
region.CopyFrom(0, from);
diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h
index 741119d7f7..50dc06fefc 100644
--- a/compiler/utils/arm/assembler_arm_vixl.h
+++ b/compiler/utils/arm/assembler_arm_vixl.h
@@ -173,6 +173,30 @@ class ArmVIXLMacroAssembler final : public vixl32::MacroAssembler {
}
}
using MacroAssembler::Vmov;
+
+ // TODO(b/281982421): Move the implementation of Mrrc to vixl and remove this implementation.
+ void Mrrc(vixl32::Register r1, vixl32::Register r2, int coproc, int opc1, int crm) {
+ // See ARM A-profile A32/T32 Instruction set architecture
+ // https://developer.arm.com/documentation/ddi0597/2022-09/Base-Instructions/MRRC--Move-to-two-general-purpose-registers-from-System-register-
+ CHECK(coproc == 15 || coproc == 14);
+ if (IsUsingT32()) {
+ uint32_t inst = (0b111011000101 << 20) |
+ (r2.GetCode() << 16) |
+ (r1.GetCode() << 12) |
+ (coproc << 8) |
+ (opc1 << 4) |
+ crm;
+ EmitT32_32(inst);
+ } else {
+ uint32_t inst = (0b000011000101 << 20) |
+ (r2.GetCode() << 16) |
+ (r1.GetCode() << 12) |
+ (coproc << 8) |
+ (opc1 << 4) |
+ crm;
+ EmitA32(inst);
+ }
+ }
};
class ArmVIXLAssembler final : public Assembler {
@@ -194,12 +218,12 @@ class ArmVIXLAssembler final : public Assembler {
const uint8_t* CodeBufferBaseAddress() const override;
// Copy instructions out of assembly buffer into the given region of memory.
- void FinalizeInstructions(const MemoryRegion& region) override;
+ void CopyInstructions(const MemoryRegion& region) override;
- void Bind(Label* label ATTRIBUTE_UNUSED) override {
+ void Bind([[maybe_unused]] Label* label) override {
UNIMPLEMENTED(FATAL) << "Do not use Bind(Label*) for ARM";
}
- void Jump(Label* label ATTRIBUTE_UNUSED) override {
+ void Jump([[maybe_unused]] Label* label) override {
UNIMPLEMENTED(FATAL) << "Do not use Jump(Label*) for ARM";
}
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index 54873454eb..7a887fa064 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -344,13 +344,13 @@ void ArmVIXLJNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs
}
}
-void ArmVIXLJNIMacroAssembler::SignExtend(ManagedRegister mreg ATTRIBUTE_UNUSED,
- size_t size ATTRIBUTE_UNUSED) {
+void ArmVIXLJNIMacroAssembler::SignExtend([[maybe_unused]] ManagedRegister mreg,
+ [[maybe_unused]] size_t size) {
UNIMPLEMENTED(FATAL) << "no sign extension necessary for arm";
}
-void ArmVIXLJNIMacroAssembler::ZeroExtend(ManagedRegister mreg ATTRIBUTE_UNUSED,
- size_t size ATTRIBUTE_UNUSED) {
+void ArmVIXLJNIMacroAssembler::ZeroExtend([[maybe_unused]] ManagedRegister mreg,
+ [[maybe_unused]] size_t size) {
UNIMPLEMENTED(FATAL) << "no zero extension necessary for arm";
}
@@ -720,7 +720,7 @@ void ArmVIXLJNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests,
void ArmVIXLJNIMacroAssembler::Move(ManagedRegister mdst,
ManagedRegister msrc,
- size_t size ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] size_t size) {
ArmManagedRegister dst = mdst.AsArm();
if (kIsDebugBuild) {
// Check that the destination is not a scratch register.
@@ -861,13 +861,13 @@ void ArmVIXLJNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister
___ Ldr(reg, MemOperand(reg));
}
-void ArmVIXLJNIMacroAssembler::VerifyObject(ManagedRegister src ATTRIBUTE_UNUSED,
- bool could_be_null ATTRIBUTE_UNUSED) {
+void ArmVIXLJNIMacroAssembler::VerifyObject([[maybe_unused]] ManagedRegister src,
+ [[maybe_unused]] bool could_be_null) {
// TODO: not validating references.
}
-void ArmVIXLJNIMacroAssembler::VerifyObject(FrameOffset src ATTRIBUTE_UNUSED,
- bool could_be_null ATTRIBUTE_UNUSED) {
+void ArmVIXLJNIMacroAssembler::VerifyObject([[maybe_unused]] FrameOffset src,
+ [[maybe_unused]] bool could_be_null) {
// TODO: not validating references.
}
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 26dce7c502..13acc7c852 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -79,7 +79,7 @@ const uint8_t* Arm64Assembler::CodeBufferBaseAddress() const {
return vixl_masm_.GetBuffer().GetStartAddress<const uint8_t*>();
}
-void Arm64Assembler::FinalizeInstructions(const MemoryRegion& region) {
+void Arm64Assembler::CopyInstructions(const MemoryRegion& region) {
// Copy the instructions from the buffer.
MemoryRegion from(vixl_masm_.GetBuffer()->GetStartAddress<void*>(), CodeSize());
region.CopyFrom(0, from);
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index f8168903bd..ad6a8edadf 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -91,7 +91,7 @@ class Arm64Assembler final : public Assembler {
const uint8_t* CodeBufferBaseAddress() const override;
// Copy instructions out of assembly buffer into the given region of memory.
- void FinalizeInstructions(const MemoryRegion& region) override;
+ void CopyInstructions(const MemoryRegion& region) override;
void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs);
@@ -145,10 +145,10 @@ class Arm64Assembler final : public Assembler {
// MaybeGenerateMarkingRegisterCheck and is passed to the BRK instruction.
void GenerateMarkingRegisterCheck(vixl::aarch64::Register temp, int code = 0);
- void Bind(Label* label ATTRIBUTE_UNUSED) override {
+ void Bind([[maybe_unused]] Label* label) override {
UNIMPLEMENTED(FATAL) << "Do not use Bind(Label*) for ARM64";
}
- void Jump(Label* label ATTRIBUTE_UNUSED) override {
+ void Jump([[maybe_unused]] Label* label) override {
UNIMPLEMENTED(FATAL) << "Do not use Jump(Label*) for ARM64";
}
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index 9e9f122cf6..c5380695d9 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -705,7 +705,7 @@ void Arm64JNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister m
}
void Arm64JNIMacroAssembler::TryToTransitionFromRunnableToNative(
- JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs ATTRIBUTE_UNUSED) {
+ JNIMacroLabel* label, [[maybe_unused]] ArrayRef<const ManagedRegister> scratch_regs) {
constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kArm64PointerSize>();
@@ -734,8 +734,8 @@ void Arm64JNIMacroAssembler::TryToTransitionFromRunnableToNative(
void Arm64JNIMacroAssembler::TryToTransitionFromNativeToRunnable(
JNIMacroLabel* label,
- ArrayRef<const ManagedRegister> scratch_regs ATTRIBUTE_UNUSED,
- ManagedRegister return_reg ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] ArrayRef<const ManagedRegister> scratch_regs,
+ [[maybe_unused]] ManagedRegister return_reg) {
constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kArm64PointerSize>();
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index b82f0dc4b4..1c04a3d20b 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -57,18 +57,21 @@ void AssemblerBuffer::ProcessFixups(const MemoryRegion& region) {
fixup->Process(region, fixup->position());
fixup = fixup->previous();
}
+#ifndef NDEBUG
+ fixups_processed_ = true;
+#endif
+}
+
+
+void AssemblerBuffer::ProcessFixups() {
+ MemoryRegion from(reinterpret_cast<void*>(contents()), Size());
+ ProcessFixups(from);
}
-void AssemblerBuffer::FinalizeInstructions(const MemoryRegion& instructions) {
- // Copy the instructions from the buffer.
+void AssemblerBuffer::CopyInstructions(const MemoryRegion& instructions) {
MemoryRegion from(reinterpret_cast<void*>(contents()), Size());
instructions.CopyFrom(0, from);
- // Process fixups in the instructions.
- ProcessFixups(instructions);
-#ifndef NDEBUG
- fixups_processed_ = true;
-#endif
}
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 13a5d9fd01..f3fa711dbb 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -163,9 +163,8 @@ class AssemblerBuffer {
uint8_t* contents() const { return contents_; }
- // Copy the assembled instructions into the specified memory block
- // and apply all fixups.
- void FinalizeInstructions(const MemoryRegion& region);
+ // Copy the assembled instructions into the specified memory block.
+ void CopyInstructions(const MemoryRegion& region);
// To emit an instruction to the assembler buffer, the EnsureCapacity helper
// must be used to guarantee that the underlying data area is big enough to
@@ -246,6 +245,8 @@ class AssemblerBuffer {
// The provided `min_capacity` must be higher than current `Capacity()`.
void ExtendCapacity(size_t min_capacity);
+ void ProcessFixups();
+
private:
// The limit is set to kMinimumGap bytes before the end of the data area.
// This leaves enough space for the longest possible instruction and allows
@@ -357,7 +358,10 @@ class DebugFrameOpCodeWriterForAssembler final
class Assembler : public DeletableArenaObject<kArenaAllocAssembler> {
public:
// Finalize the code; emit slow paths, fixup branches, add literal pool, etc.
- virtual void FinalizeCode() { buffer_.EmitSlowPaths(this); }
+ virtual void FinalizeCode() {
+ buffer_.EmitSlowPaths(this);
+ buffer_.ProcessFixups();
+ }
// Size of generated code
virtual size_t CodeSize() const { return buffer_.Size(); }
@@ -375,12 +379,12 @@ class Assembler : public DeletableArenaObject<kArenaAllocAssembler> {
virtual size_t CodePosition() { return CodeSize(); }
// Copy instructions out of assembly buffer into the given region of memory
- virtual void FinalizeInstructions(const MemoryRegion& region) {
- buffer_.FinalizeInstructions(region);
+ virtual void CopyInstructions(const MemoryRegion& region) {
+ buffer_.CopyInstructions(region);
}
// TODO: Implement with disassembler.
- virtual void Comment(const char* format ATTRIBUTE_UNUSED, ...) {}
+ virtual void Comment([[maybe_unused]] const char* format, ...) {}
virtual void Bind(Label* label) = 0;
virtual void Jump(Label* label) = 0;
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index d03e5a7abc..72f48367a6 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -26,6 +26,7 @@
#include <fstream>
#include <iterator>
+#include "base/array_ref.h"
#include "base/macros.h"
#include "base/malloc_arena_pool.h"
#include "assembler_test_base.h"
@@ -200,8 +201,8 @@ class AssemblerTest : public AssemblerTestBase {
template <typename Reg1, typename Reg2, typename ImmType>
std::string RepeatTemplatedRegistersImmBits(void (Ass::*f)(Reg1, Reg2, ImmType),
int imm_bits,
- const std::vector<Reg1*> reg1_registers,
- const std::vector<Reg2*> reg2_registers,
+ ArrayRef<const Reg1> reg1_registers,
+ ArrayRef<const Reg2> reg2_registers,
std::string (AssemblerTest::*GetName1)(const Reg1&),
std::string (AssemblerTest::*GetName2)(const Reg2&),
const std::string& fmt,
@@ -215,48 +216,28 @@ class AssemblerTest : public AssemblerTestBase {
for (int64_t imm : imms) {
ImmType new_imm = CreateImmediate(imm);
if (f != nullptr) {
- (assembler_.get()->*f)(*reg1, *reg2, new_imm * multiplier + bias);
+ (assembler_.get()->*f)(reg1, reg2, new_imm * multiplier + bias);
}
std::string base = fmt;
- std::string reg1_string = (this->*GetName1)(*reg1);
- size_t reg1_index;
- while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
- base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
- }
-
- std::string reg2_string = (this->*GetName2)(*reg2);
- size_t reg2_index;
- while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
- base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
- }
+ ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base);
+ ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base);
+ ReplaceImm(imm, bias, multiplier, &base);
- size_t imm_index = base.find(IMM_TOKEN);
- if (imm_index != std::string::npos) {
- std::ostringstream sreg;
- sreg << imm * multiplier + bias;
- std::string imm_string = sreg.str();
- base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
- }
-
- if (str.size() > 0) {
- str += "\n";
- }
str += base;
+ str += "\n";
}
}
}
- // Add a newline at the end.
- str += "\n";
return str;
}
template <typename Reg1, typename Reg2, typename Reg3, typename ImmType>
std::string RepeatTemplatedRegistersImmBits(void (Ass::*f)(Reg1, Reg2, Reg3, ImmType),
int imm_bits,
- const std::vector<Reg1*> reg1_registers,
- const std::vector<Reg2*> reg2_registers,
- const std::vector<Reg3*> reg3_registers,
+ ArrayRef<const Reg1> reg1_registers,
+ ArrayRef<const Reg2> reg2_registers,
+ ArrayRef<const Reg3> reg3_registers,
std::string (AssemblerTest::*GetName1)(const Reg1&),
std::string (AssemblerTest::*GetName2)(const Reg2&),
std::string (AssemblerTest::*GetName3)(const Reg3&),
@@ -271,53 +252,28 @@ class AssemblerTest : public AssemblerTestBase {
for (int64_t imm : imms) {
ImmType new_imm = CreateImmediate(imm);
if (f != nullptr) {
- (assembler_.get()->*f)(*reg1, *reg2, *reg3, new_imm + bias);
+ (assembler_.get()->*f)(reg1, reg2, reg3, new_imm + bias);
}
std::string base = fmt;
- std::string reg1_string = (this->*GetName1)(*reg1);
- size_t reg1_index;
- while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
- base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
- }
-
- std::string reg2_string = (this->*GetName2)(*reg2);
- size_t reg2_index;
- while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
- base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
- }
-
- std::string reg3_string = (this->*GetName3)(*reg3);
- size_t reg3_index;
- while ((reg3_index = base.find(REG3_TOKEN)) != std::string::npos) {
- base.replace(reg3_index, ConstexprStrLen(REG3_TOKEN), reg3_string);
- }
-
- size_t imm_index = base.find(IMM_TOKEN);
- if (imm_index != std::string::npos) {
- std::ostringstream sreg;
- sreg << imm + bias;
- std::string imm_string = sreg.str();
- base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
- }
+ ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base);
+ ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base);
+ ReplaceReg(REG3_TOKEN, (this->*GetName3)(reg3), &base);
+ ReplaceImm(imm, bias, /*multiplier=*/ 1, &base);
- if (str.size() > 0) {
- str += "\n";
- }
str += base;
+ str += "\n";
}
}
}
}
- // Add a newline at the end.
- str += "\n";
return str;
}
template <typename ImmType, typename Reg1, typename Reg2>
std::string RepeatTemplatedImmBitsRegisters(void (Ass::*f)(ImmType, Reg1, Reg2),
- const std::vector<Reg1*> reg1_registers,
- const std::vector<Reg2*> reg2_registers,
+ ArrayRef<const Reg1> reg1_registers,
+ ArrayRef<const Reg2> reg2_registers,
std::string (AssemblerTest::*GetName1)(const Reg1&),
std::string (AssemblerTest::*GetName2)(const Reg2&),
int imm_bits,
@@ -332,46 +288,26 @@ class AssemblerTest : public AssemblerTestBase {
for (int64_t imm : imms) {
ImmType new_imm = CreateImmediate(imm);
if (f != nullptr) {
- (assembler_.get()->*f)(new_imm, *reg1, *reg2);
+ (assembler_.get()->*f)(new_imm, reg1, reg2);
}
std::string base = fmt;
- std::string reg1_string = (this->*GetName1)(*reg1);
- size_t reg1_index;
- while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
- base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
- }
-
- std::string reg2_string = (this->*GetName2)(*reg2);
- size_t reg2_index;
- while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
- base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
- }
-
- size_t imm_index = base.find(IMM_TOKEN);
- if (imm_index != std::string::npos) {
- std::ostringstream sreg;
- sreg << imm;
- std::string imm_string = sreg.str();
- base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
- }
+ ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base);
+ ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base);
+ ReplaceImm(imm, /*bias=*/ 0, /*multiplier=*/ 1, &base);
- if (str.size() > 0) {
- str += "\n";
- }
str += base;
+ str += "\n";
}
}
}
- // Add a newline at the end.
- str += "\n";
return str;
}
template <typename RegType, typename ImmType>
std::string RepeatTemplatedRegisterImmBits(void (Ass::*f)(RegType, ImmType),
int imm_bits,
- const std::vector<RegType*> registers,
+ ArrayRef<const RegType> registers,
std::string (AssemblerTest::*GetName)(const RegType&),
const std::string& fmt,
int bias) {
@@ -382,36 +318,148 @@ class AssemblerTest : public AssemblerTestBase {
for (int64_t imm : imms) {
ImmType new_imm = CreateImmediate(imm);
if (f != nullptr) {
- (assembler_.get()->*f)(*reg, new_imm + bias);
+ (assembler_.get()->*f)(reg, new_imm + bias);
}
std::string base = fmt;
- std::string reg_string = (this->*GetName)(*reg);
- size_t reg_index;
- while ((reg_index = base.find(REG_TOKEN)) != std::string::npos) {
- base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string);
- }
+ ReplaceReg(REG_TOKEN, (this->*GetName)(reg), &base);
+ ReplaceImm(imm, bias, /*multiplier=*/ 1, &base);
- size_t imm_index = base.find(IMM_TOKEN);
- if (imm_index != std::string::npos) {
- std::ostringstream sreg;
- sreg << imm + bias;
- std::string imm_string = sreg.str();
- base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
+ str += base;
+ str += "\n";
+ }
+ }
+ return str;
+ }
+
+ template <typename RegType, typename ImmType>
+ std::string RepeatTemplatedRegisterImmBitsShift(
+ void (Ass::*f)(RegType, ImmType),
+ int imm_bits,
+ int shift,
+ ArrayRef<const RegType> registers,
+ std::string (AssemblerTest::*GetName)(const RegType&),
+ const std::string& fmt,
+ int bias) {
+ std::string str;
+ std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0), shift);
+
+ for (auto reg : registers) {
+ for (int64_t imm : imms) {
+ ImmType new_imm = CreateImmediate(imm);
+ if (f != nullptr) {
+ (assembler_.get()->*f)(reg, new_imm + bias);
}
+ std::string base = fmt;
+
+ ReplaceReg(REG_TOKEN, (this->*GetName)(reg), &base);
+ ReplaceImm(imm, bias, /*multiplier=*/ 1, &base);
+
+ str += base;
+ str += "\n";
+ }
+ }
+ return str;
+ }
+
+ template <typename ImmType>
+ std::string RepeatTemplatedImmBitsShift(
+ void (Ass::*f)(ImmType), int imm_bits, int shift, const std::string& fmt, int bias = 0) {
+ std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0), shift);
+
+ WarnOnCombinations(imms.size());
+
+ std::string str;
- if (str.size() > 0) {
+ for (int64_t imm : imms) {
+ ImmType new_imm = CreateImmediate(imm);
+ if (f != nullptr) {
+ (assembler_.get()->*f)(new_imm + bias);
+ }
+ std::string base = fmt;
+
+ ReplaceImm(imm, bias, /*multiplier=*/ 1, &base);
+
+ str += base;
+ str += "\n";
+ }
+ return str;
+ }
+
+ template <typename Reg1, typename Reg2, typename ImmType>
+ std::string RepeatTemplatedRegistersImmBitsShift(
+ void (Ass::*f)(Reg1, Reg2, ImmType),
+ int imm_bits,
+ int shift,
+ ArrayRef<const Reg1> reg1_registers,
+ ArrayRef<const Reg2> reg2_registers,
+ std::string (AssemblerTest::*GetName1)(const Reg1&),
+ std::string (AssemblerTest::*GetName2)(const Reg2&),
+ const std::string& fmt,
+ int bias = 0,
+ int multiplier = 1) {
+ std::string str;
+ std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0), shift);
+
+ for (auto reg1 : reg1_registers) {
+ for (auto reg2 : reg2_registers) {
+ for (int64_t imm : imms) {
+ ImmType new_imm = CreateImmediate(imm);
+ if (f != nullptr) {
+ (assembler_.get()->*f)(reg1, reg2, new_imm * multiplier + bias);
+ }
+ std::string base = fmt;
+
+ ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base);
+ ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base);
+ ReplaceImm(imm, bias, multiplier, &base);
+
+ str += base;
str += "\n";
}
- str += base;
}
}
- // Add a newline at the end.
- str += "\n";
return str;
}
template <typename ImmType>
+ std::string RepeatIbS(
+ void (Ass::*f)(ImmType), int imm_bits, int shift, const std::string& fmt, int bias = 0) {
+ return RepeatTemplatedImmBitsShift<ImmType>(f, imm_bits, shift, fmt, bias);
+ }
+
+ template <typename ImmType>
+ std::string RepeatRIbS(
+ void (Ass::*f)(Reg, ImmType), int imm_bits, int shift, const std::string& fmt, int bias = 0) {
+ return RepeatTemplatedRegisterImmBitsShift<Reg, ImmType>(
+ f,
+ imm_bits,
+ shift,
+ GetRegisters(),
+ &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+ fmt,
+ bias);
+ }
+
+ template <typename ImmType>
+ std::string RepeatRRIbS(void (Ass::*f)(Reg, Reg, ImmType),
+ int imm_bits,
+ int shift,
+ const std::string& fmt,
+ int bias = 0) {
+ return RepeatTemplatedRegistersImmBitsShift<Reg, Reg, ImmType>(
+ f,
+ imm_bits,
+ shift,
+ GetRegisters(),
+ GetRegisters(),
+ &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+ &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+ fmt,
+ bias);
+ }
+
+ template <typename ImmType>
std::string RepeatRRIb(void (Ass::*f)(Reg, Reg, ImmType),
int imm_bits,
const std::string& fmt,
@@ -488,6 +536,19 @@ class AssemblerTest : public AssemblerTestBase {
fmt);
}
+ std::string RepeatFFFF(void (Ass::*f)(FPReg, FPReg, FPReg, FPReg), const std::string& fmt) {
+ return RepeatTemplatedRegisters<FPReg, FPReg, FPReg, FPReg>(f,
+ GetFPRegisters(),
+ GetFPRegisters(),
+ GetFPRegisters(),
+ GetFPRegisters(),
+ &AssemblerTest::GetFPRegName,
+ &AssemblerTest::GetFPRegName,
+ &AssemblerTest::GetFPRegName,
+ &AssemblerTest::GetFPRegName,
+ fmt);
+ }
+
std::string RepeatFFR(void (Ass::*f)(FPReg, FPReg, Reg), const std::string& fmt) {
return RepeatTemplatedRegisters<FPReg, FPReg, Reg>(
f,
@@ -538,6 +599,32 @@ class AssemblerTest : public AssemblerTestBase {
fmt);
}
+ std::string RepeatRFF(void (Ass::*f)(Reg, FPReg, FPReg), const std::string& fmt) {
+ return RepeatTemplatedRegisters<Reg, FPReg, FPReg>(
+ f,
+ GetRegisters(),
+ GetFPRegisters(),
+ GetFPRegisters(),
+ &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+ &AssemblerTest::GetFPRegName,
+ &AssemblerTest::GetFPRegName,
+ fmt);
+ }
+
+ template <typename ImmType>
+ std::string RepeatRFIb(void (Ass::*f)(Reg, FPReg, ImmType),
+ int imm_bits,
+ const std::string& fmt) {
+ return RepeatTemplatedRegistersImmBits<Reg, FPReg, ImmType>(
+ f,
+ imm_bits,
+ GetRegisters(),
+ GetFPRegisters(),
+ &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+ &AssemblerTest::GetFPRegName,
+ fmt);
+ }
+
std::string RepeatFR(void (Ass::*f)(FPReg, Reg), const std::string& fmt) {
return RepeatTemplatedRegisters<FPReg, Reg>(f,
GetFPRegisters(),
@@ -590,21 +677,11 @@ class AssemblerTest : public AssemblerTestBase {
}
std::string base = fmt;
- size_t imm_index = base.find(IMM_TOKEN);
- if (imm_index != std::string::npos) {
- std::ostringstream sreg;
- sreg << imm;
- std::string imm_string = sreg.str();
- base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
- }
+ ReplaceImm(imm, /*bias=*/ 0, /*multiplier=*/ 1, &base);
- if (str.size() > 0) {
- str += "\n";
- }
str += base;
+ str += "\n";
}
- // Add a newline at the end.
- str += "\n";
return str;
}
@@ -710,36 +787,36 @@ class AssemblerTest : public AssemblerTestBase {
// Returns a vector of registers used by any of the repeat methods
// involving an "R" (e.g. RepeatR).
- virtual std::vector<Reg*> GetRegisters() = 0;
+ virtual ArrayRef<const Reg> GetRegisters() = 0;
// Returns a vector of fp-registers used by any of the repeat methods
// involving an "F" (e.g. RepeatFF).
- virtual std::vector<FPReg*> GetFPRegisters() {
+ virtual ArrayRef<const FPReg> GetFPRegisters() {
UNIMPLEMENTED(FATAL) << "Architecture does not support floating-point registers";
UNREACHABLE();
}
// Returns a vector of dedicated simd-registers used by any of the repeat
// methods involving an "V" (e.g. RepeatVV).
- virtual std::vector<VecReg*> GetVectorRegisters() {
+ virtual ArrayRef<const VecReg> GetVectorRegisters() {
UNIMPLEMENTED(FATAL) << "Architecture does not support vector registers";
UNREACHABLE();
}
// Secondary register names are the secondary view on registers, e.g., 32b on 64b systems.
- virtual std::string GetSecondaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) {
+ virtual std::string GetSecondaryRegisterName([[maybe_unused]] const Reg& reg) {
UNIMPLEMENTED(FATAL) << "Architecture does not support secondary registers";
UNREACHABLE();
}
// Tertiary register names are the tertiary view on registers, e.g., 16b on 64b systems.
- virtual std::string GetTertiaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) {
+ virtual std::string GetTertiaryRegisterName([[maybe_unused]] const Reg& reg) {
UNIMPLEMENTED(FATAL) << "Architecture does not support tertiary registers";
UNREACHABLE();
}
// Quaternary register names are the quaternary view on registers, e.g., 8b on 64b systems.
- virtual std::string GetQuaternaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) {
+ virtual std::string GetQuaternaryRegisterName([[maybe_unused]] const Reg& reg) {
UNIMPLEMENTED(FATAL) << "Architecture does not support quaternary registers";
UNREACHABLE();
}
@@ -818,7 +895,9 @@ class AssemblerTest : public AssemblerTestBase {
const int kMaxBitsExhaustiveTest = 8;
// Create a couple of immediate values up to the number of bits given.
- virtual std::vector<int64_t> CreateImmediateValuesBits(const int imm_bits, bool as_uint = false) {
+ virtual std::vector<int64_t> CreateImmediateValuesBits(const int imm_bits,
+ bool as_uint = false,
+ int shift = 0) {
CHECK_GT(imm_bits, 0);
CHECK_LE(imm_bits, 64);
std::vector<int64_t> res;
@@ -826,11 +905,11 @@ class AssemblerTest : public AssemblerTestBase {
if (imm_bits <= kMaxBitsExhaustiveTest) {
if (as_uint) {
for (uint64_t i = MinInt<uint64_t>(imm_bits); i <= MaxInt<uint64_t>(imm_bits); i++) {
- res.push_back(static_cast<int64_t>(i));
+ res.push_back(static_cast<int64_t>(i << shift));
}
} else {
for (int64_t i = MinInt<int64_t>(imm_bits); i <= MaxInt<int64_t>(imm_bits); i++) {
- res.push_back(i);
+ res.push_back(i << shift);
}
}
} else {
@@ -838,14 +917,14 @@ class AssemblerTest : public AssemblerTestBase {
for (uint64_t i = MinInt<uint64_t>(kMaxBitsExhaustiveTest);
i <= MaxInt<uint64_t>(kMaxBitsExhaustiveTest);
i++) {
- res.push_back(static_cast<int64_t>(i));
+ res.push_back(static_cast<int64_t>(i << shift));
}
for (int i = 0; i <= imm_bits; i++) {
uint64_t j = (MaxInt<uint64_t>(kMaxBitsExhaustiveTest) + 1) +
((MaxInt<uint64_t>(imm_bits) -
(MaxInt<uint64_t>(kMaxBitsExhaustiveTest) + 1))
* i / imm_bits);
- res.push_back(static_cast<int64_t>(j));
+ res.push_back(static_cast<int64_t>(j << shift));
}
} else {
for (int i = 0; i <= imm_bits; i++) {
@@ -853,18 +932,18 @@ class AssemblerTest : public AssemblerTestBase {
((((MinInt<int64_t>(kMaxBitsExhaustiveTest) - 1) -
MinInt<int64_t>(imm_bits))
* i) / imm_bits);
- res.push_back(static_cast<int64_t>(j));
+ res.push_back(static_cast<int64_t>(j << shift));
}
for (int64_t i = MinInt<int64_t>(kMaxBitsExhaustiveTest);
i <= MaxInt<int64_t>(kMaxBitsExhaustiveTest);
i++) {
- res.push_back(static_cast<int64_t>(i));
+ res.push_back(static_cast<int64_t>(i << shift));
}
for (int i = 0; i <= imm_bits; i++) {
int64_t j = (MaxInt<int64_t>(kMaxBitsExhaustiveTest) + 1) +
((MaxInt<int64_t>(imm_bits) - (MaxInt<int64_t>(kMaxBitsExhaustiveTest) + 1))
* i / imm_bits);
- res.push_back(static_cast<int64_t>(j));
+ res.push_back(static_cast<int64_t>(j << shift));
}
}
}
@@ -1111,19 +1190,11 @@ class AssemblerTest : public AssemblerTestBase {
}
std::string base = fmt;
- std::string addr_string = (this->*GetAName)(addr);
- size_t addr_index;
- if ((addr_index = base.find(ADDRESS_TOKEN)) != std::string::npos) {
- base.replace(addr_index, ConstexprStrLen(ADDRESS_TOKEN), addr_string);
- }
+ ReplaceAddr((this->*GetAName)(addr), &base);
- if (str.size() > 0) {
- str += "\n";
- }
str += base;
+ str += "\n";
}
- // Add a newline at the end.
- str += "\n";
return str;
}
@@ -1144,34 +1215,19 @@ class AssemblerTest : public AssemblerTestBase {
}
std::string base = fmt;
- std::string addr_string = (this->*GetAName)(addr);
- size_t addr_index;
- if ((addr_index = base.find(ADDRESS_TOKEN)) != std::string::npos) {
- base.replace(addr_index, ConstexprStrLen(ADDRESS_TOKEN), addr_string);
- }
-
- size_t imm_index = base.find(IMM_TOKEN);
- if (imm_index != std::string::npos) {
- std::ostringstream sreg;
- sreg << imm;
- std::string imm_string = sreg.str();
- base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
- }
+ ReplaceAddr((this->*GetAName)(addr), &base);
+ ReplaceImm(imm, /*bias=*/ 0, /*multiplier=*/ 1, &base);
- if (str.size() > 0) {
- str += "\n";
- }
str += base;
+ str += "\n";
}
}
- // Add a newline at the end.
- str += "\n";
return str;
}
template <typename RegType, typename AddrType>
std::string RepeatTemplatedRegMem(void (Ass::*f)(RegType, const AddrType&),
- const std::vector<RegType*> registers,
+ ArrayRef<const RegType> registers,
const std::vector<AddrType> addresses,
std::string (AssemblerTest::*GetRName)(const RegType&),
std::string (AssemblerTest::*GetAName)(const AddrType&),
@@ -1181,37 +1237,24 @@ class AssemblerTest : public AssemblerTestBase {
for (auto reg : registers) {
for (auto addr : addresses) {
if (f != nullptr) {
- (assembler_.get()->*f)(*reg, addr);
+ (assembler_.get()->*f)(reg, addr);
}
std::string base = fmt;
- std::string reg_string = (this->*GetRName)(*reg);
- size_t reg_index;
- if ((reg_index = base.find(REG_TOKEN)) != std::string::npos) {
- base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string);
- }
-
- std::string addr_string = (this->*GetAName)(addr);
- size_t addr_index;
- if ((addr_index = base.find(ADDRESS_TOKEN)) != std::string::npos) {
- base.replace(addr_index, ConstexprStrLen(ADDRESS_TOKEN), addr_string);
- }
+ ReplaceReg(REG_TOKEN, (this->*GetRName)(reg), &base);
+ ReplaceAddr((this->*GetAName)(addr), &base);
- if (str.size() > 0) {
- str += "\n";
- }
str += base;
+ str += "\n";
}
}
- // Add a newline at the end.
- str += "\n";
return str;
}
template <typename AddrType, typename RegType>
std::string RepeatTemplatedMemReg(void (Ass::*f)(const AddrType&, RegType),
const std::vector<AddrType> addresses,
- const std::vector<RegType*> registers,
+ ArrayRef<const RegType> registers,
std::string (AssemblerTest::*GetAName)(const AddrType&),
std::string (AssemblerTest::*GetRName)(const RegType&),
const std::string& fmt) {
@@ -1220,30 +1263,17 @@ class AssemblerTest : public AssemblerTestBase {
for (auto addr : addresses) {
for (auto reg : registers) {
if (f != nullptr) {
- (assembler_.get()->*f)(addr, *reg);
+ (assembler_.get()->*f)(addr, reg);
}
std::string base = fmt;
- std::string addr_string = (this->*GetAName)(addr);
- size_t addr_index;
- if ((addr_index = base.find(ADDRESS_TOKEN)) != std::string::npos) {
- base.replace(addr_index, ConstexprStrLen(ADDRESS_TOKEN), addr_string);
- }
+ ReplaceAddr((this->*GetAName)(addr), &base);
+ ReplaceReg(REG_TOKEN, (this->*GetRName)(reg), &base);
- std::string reg_string = (this->*GetRName)(*reg);
- size_t reg_index;
- if ((reg_index = base.find(REG_TOKEN)) != std::string::npos) {
- base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string);
- }
-
- if (str.size() > 0) {
- str += "\n";
- }
str += base;
+ str += "\n";
}
}
- // Add a newline at the end.
- str += "\n";
return str;
}
@@ -1253,36 +1283,28 @@ class AssemblerTest : public AssemblerTestBase {
template <typename RegType>
std::string RepeatTemplatedRegister(void (Ass::*f)(RegType),
- const std::vector<RegType*> registers,
+ ArrayRef<const RegType> registers,
std::string (AssemblerTest::*GetName)(const RegType&),
const std::string& fmt) {
std::string str;
for (auto reg : registers) {
if (f != nullptr) {
- (assembler_.get()->*f)(*reg);
+ (assembler_.get()->*f)(reg);
}
std::string base = fmt;
- std::string reg_string = (this->*GetName)(*reg);
- size_t reg_index;
- if ((reg_index = base.find(REG_TOKEN)) != std::string::npos) {
- base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string);
- }
+ ReplaceReg(REG_TOKEN, (this->*GetName)(reg), &base);
- if (str.size() > 0) {
- str += "\n";
- }
str += base;
+ str += "\n";
}
- // Add a newline at the end.
- str += "\n";
return str;
}
template <typename Reg1, typename Reg2>
std::string RepeatTemplatedRegisters(void (Ass::*f)(Reg1, Reg2),
- const std::vector<Reg1*> reg1_registers,
- const std::vector<Reg2*> reg2_registers,
+ ArrayRef<const Reg1> reg1_registers,
+ ArrayRef<const Reg2> reg2_registers,
std::string (AssemblerTest::*GetName1)(const Reg1&),
std::string (AssemblerTest::*GetName2)(const Reg2&),
const std::string& fmt,
@@ -1294,44 +1316,31 @@ class AssemblerTest : public AssemblerTestBase {
for (auto reg2 : reg2_registers) {
// Check if this register pair is on the exception list. If so, skip it.
if (except != nullptr) {
- const auto& pair = std::make_pair(*reg1, *reg2);
+ const auto& pair = std::make_pair(reg1, reg2);
if (std::find(except->begin(), except->end(), pair) != except->end()) {
continue;
}
}
if (f != nullptr) {
- (assembler_.get()->*f)(*reg1, *reg2);
+ (assembler_.get()->*f)(reg1, reg2);
}
std::string base = fmt;
- std::string reg1_string = (this->*GetName1)(*reg1);
- size_t reg1_index;
- while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
- base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
- }
-
- std::string reg2_string = (this->*GetName2)(*reg2);
- size_t reg2_index;
- while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
- base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
- }
+ ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base);
+ ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base);
- if (str.size() > 0) {
- str += "\n";
- }
str += base;
+ str += "\n";
}
}
- // Add a newline at the end.
- str += "\n";
return str;
}
template <typename Reg1, typename Reg2>
std::string RepeatTemplatedRegistersNoDupes(void (Ass::*f)(Reg1, Reg2),
- const std::vector<Reg1*> reg1_registers,
- const std::vector<Reg2*> reg2_registers,
+ ArrayRef<const Reg1> reg1_registers,
+ ArrayRef<const Reg2> reg2_registers,
std::string (AssemblerTest::*GetName1)(const Reg1&),
std::string (AssemblerTest::*GetName2)(const Reg2&),
const std::string& fmt) {
@@ -1342,38 +1351,25 @@ class AssemblerTest : public AssemblerTestBase {
for (auto reg2 : reg2_registers) {
if (reg1 == reg2) continue;
if (f != nullptr) {
- (assembler_.get()->*f)(*reg1, *reg2);
+ (assembler_.get()->*f)(reg1, reg2);
}
std::string base = fmt;
- std::string reg1_string = (this->*GetName1)(*reg1);
- size_t reg1_index;
- while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
- base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
- }
-
- std::string reg2_string = (this->*GetName2)(*reg2);
- size_t reg2_index;
- while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
- base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
- }
+ ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base);
+ ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base);
- if (str.size() > 0) {
- str += "\n";
- }
str += base;
+ str += "\n";
}
}
- // Add a newline at the end.
- str += "\n";
return str;
}
template <typename Reg1, typename Reg2, typename Reg3>
std::string RepeatTemplatedRegisters(void (Ass::*f)(Reg1, Reg2, Reg3),
- const std::vector<Reg1*> reg1_registers,
- const std::vector<Reg2*> reg2_registers,
- const std::vector<Reg3*> reg3_registers,
+ ArrayRef<const Reg1> reg1_registers,
+ ArrayRef<const Reg2> reg2_registers,
+ ArrayRef<const Reg3> reg3_registers,
std::string (AssemblerTest::*GetName1)(const Reg1&),
std::string (AssemblerTest::*GetName2)(const Reg2&),
std::string (AssemblerTest::*GetName3)(const Reg3&),
@@ -1383,44 +1379,61 @@ class AssemblerTest : public AssemblerTestBase {
for (auto reg2 : reg2_registers) {
for (auto reg3 : reg3_registers) {
if (f != nullptr) {
- (assembler_.get()->*f)(*reg1, *reg2, *reg3);
+ (assembler_.get()->*f)(reg1, reg2, reg3);
}
std::string base = fmt;
- std::string reg1_string = (this->*GetName1)(*reg1);
- size_t reg1_index;
- while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
- base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
- }
+ ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base);
+ ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base);
+ ReplaceReg(REG3_TOKEN, (this->*GetName3)(reg3), &base);
- std::string reg2_string = (this->*GetName2)(*reg2);
- size_t reg2_index;
- while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
- base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
- }
+ str += base;
+ str += "\n";
+ }
+ }
+ }
+ return str;
+ }
- std::string reg3_string = (this->*GetName3)(*reg3);
- size_t reg3_index;
- while ((reg3_index = base.find(REG3_TOKEN)) != std::string::npos) {
- base.replace(reg3_index, ConstexprStrLen(REG3_TOKEN), reg3_string);
- }
+ template <typename Reg1, typename Reg2, typename Reg3, typename Reg4>
+ std::string RepeatTemplatedRegisters(void (Ass::*f)(Reg1, Reg2, Reg3, Reg4),
+ ArrayRef<const Reg1> reg1_registers,
+ ArrayRef<const Reg2> reg2_registers,
+ ArrayRef<const Reg3> reg3_registers,
+ ArrayRef<const Reg4> reg4_registers,
+ std::string (AssemblerTest::*GetName1)(const Reg1&),
+ std::string (AssemblerTest::*GetName2)(const Reg2&),
+ std::string (AssemblerTest::*GetName3)(const Reg3&),
+ std::string (AssemblerTest::*GetName4)(const Reg4&),
+ const std::string& fmt) {
+ std::string str;
+ for (auto reg1 : reg1_registers) {
+ for (auto reg2 : reg2_registers) {
+ for (auto reg3 : reg3_registers) {
+ for (auto reg4 : reg4_registers) {
+ if (f != nullptr) {
+ (assembler_.get()->*f)(reg1, reg2, reg3, reg4);
+ }
+ std::string base = fmt;
- if (str.size() > 0) {
+ ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base);
+ ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base);
+ ReplaceReg(REG3_TOKEN, (this->*GetName3)(reg3), &base);
+ ReplaceReg(REG4_TOKEN, (this->*GetName4)(reg4), &base);
+
+ str += base;
str += "\n";
}
- str += base;
}
}
}
- // Add a newline at the end.
- str += "\n";
return str;
}
template <typename Reg1, typename Reg2>
std::string RepeatTemplatedRegistersImm(void (Ass::*f)(Reg1, Reg2, const Imm&),
- const std::vector<Reg1*> reg1_registers,
- const std::vector<Reg2*> reg2_registers,
+ ArrayRef<const Reg1> reg1_registers,
+ ArrayRef<const Reg2> reg2_registers,
std::string (AssemblerTest::*GetName1)(const Reg1&),
std::string (AssemblerTest::*GetName2)(const Reg2&),
size_t imm_bytes,
@@ -1434,39 +1447,19 @@ class AssemblerTest : public AssemblerTestBase {
for (int64_t imm : imms) {
Imm new_imm = CreateImmediate(imm);
if (f != nullptr) {
- (assembler_.get()->*f)(*reg1, *reg2, new_imm);
+ (assembler_.get()->*f)(reg1, reg2, new_imm);
}
std::string base = fmt;
- std::string reg1_string = (this->*GetName1)(*reg1);
- size_t reg1_index;
- while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
- base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
- }
-
- std::string reg2_string = (this->*GetName2)(*reg2);
- size_t reg2_index;
- while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
- base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
- }
-
- size_t imm_index = base.find(IMM_TOKEN);
- if (imm_index != std::string::npos) {
- std::ostringstream sreg;
- sreg << imm;
- std::string imm_string = sreg.str();
- base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
- }
+ ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base);
+ ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base);
+ ReplaceImm(imm, /*bias=*/ 0, /*multiplier=*/ 1, &base);
- if (str.size() > 0) {
- str += "\n";
- }
str += base;
+ str += "\n";
}
}
}
- // Add a newline at the end.
- str += "\n";
return str;
}
@@ -1517,11 +1510,41 @@ class AssemblerTest : public AssemblerTestBase {
}
}
+ static void ReplaceReg(const std::string& reg_token,
+ const std::string& replacement,
+ /*inout*/ std::string* str) {
+ size_t reg_index;
+ while ((reg_index = str->find(reg_token)) != std::string::npos) {
+ str->replace(reg_index, reg_token.length(), replacement);
+ }
+ }
+
+ static void ReplaceImm(int64_t imm,
+ int64_t bias,
+ int64_t multiplier,
+ /*inout*/ std::string* str) {
+ size_t imm_index = str->find(IMM_TOKEN);
+ if (imm_index != std::string::npos) {
+ std::ostringstream sreg;
+ sreg << imm * multiplier + bias;
+ std::string imm_string = sreg.str();
+ str->replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
+ }
+ }
+
+ static void ReplaceAddr(const std::string& replacement, /*inout*/ std::string* str) {
+ size_t addr_index;
+ if ((addr_index = str->find(ADDRESS_TOKEN)) != std::string::npos) {
+ str->replace(addr_index, ConstexprStrLen(ADDRESS_TOKEN), replacement);
+ }
+ }
+
static constexpr const char* ADDRESS_TOKEN = "{mem}";
static constexpr const char* REG_TOKEN = "{reg}";
static constexpr const char* REG1_TOKEN = "{reg1}";
static constexpr const char* REG2_TOKEN = "{reg2}";
static constexpr const char* REG3_TOKEN = "{reg3}";
+ static constexpr const char* REG4_TOKEN = "{reg4}";
static constexpr const char* IMM_TOKEN = "{imm}";
private:
@@ -1529,7 +1552,7 @@ class AssemblerTest : public AssemblerTestBase {
std::string RepeatRegisterImm(void (Ass::*f)(Reg, const Imm&),
size_t imm_bytes,
const std::string& fmt) {
- const std::vector<Reg*> registers = GetRegisters();
+ ArrayRef<const Reg> registers = GetRegisters();
std::string str;
std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
@@ -1539,45 +1562,29 @@ class AssemblerTest : public AssemblerTestBase {
for (int64_t imm : imms) {
Imm new_imm = CreateImmediate(imm);
if (f != nullptr) {
- (assembler_.get()->*f)(*reg, new_imm);
+ (assembler_.get()->*f)(reg, new_imm);
}
std::string base = fmt;
- std::string reg_string = GetRegName<kRegView>(*reg);
- size_t reg_index;
- while ((reg_index = base.find(REG_TOKEN)) != std::string::npos) {
- base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string);
- }
+ ReplaceReg(REG_TOKEN, GetRegName<kRegView>(reg), &base);
+ ReplaceImm(imm, /*bias=*/ 0, /*multiplier=*/ 1, &base);
- size_t imm_index = base.find(IMM_TOKEN);
- if (imm_index != std::string::npos) {
- std::ostringstream sreg;
- sreg << imm;
- std::string imm_string = sreg.str();
- base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
- }
-
- if (str.size() > 0) {
- str += "\n";
- }
str += base;
+ str += "\n";
}
}
- // Add a newline at the end.
- str += "\n";
return str;
}
// Override this to pad the code with NOPs to a certain size if needed.
- virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) {
- }
+ virtual void Pad([[maybe_unused]] std::vector<uint8_t>& data) {}
void DriverWrapper(const std::string& assembly_text, const std::string& test_name) {
assembler_->FinalizeCode();
size_t cs = assembler_->CodeSize();
std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
MemoryRegion code(&(*data)[0], data->size());
- assembler_->FinalizeInstructions(code);
+ assembler_->CopyInstructions(code);
Pad(*data);
Driver(*data, assembly_text, test_name);
}
diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h
index 73f3657413..6f836d3718 100644
--- a/compiler/utils/assembler_test_base.h
+++ b/compiler/utils/assembler_test_base.h
@@ -141,6 +141,16 @@ class AssemblerTestBase : public testing::Test {
virtual std::vector<std::string> GetAssemblerCommand() {
InstructionSet isa = GetIsa();
switch (isa) {
+ case InstructionSet::kRiscv64:
+ // TODO(riscv64): Support compression (RV32C) in assembler and tests (add `c` to `-march=`).
+ return {FindTool("clang"),
+ "--compile",
+ "-target",
+ "riscv64-linux-gnu",
+ "-march=rv64imafd_zba_zbb",
+ // Force the assembler to fully emit branch instructions instead of leaving
+ // offsets unresolved with relocation information for the linker.
+ "-mno-relax"};
case InstructionSet::kX86:
return {FindTool("clang"), "--compile", "-target", "i386-linux-gnu"};
case InstructionSet::kX86_64:
@@ -159,6 +169,15 @@ class AssemblerTestBase : public testing::Test {
"--no-print-imm-hex",
"--triple",
"thumbv7a-linux-gnueabi"};
+ case InstructionSet::kRiscv64:
+ return {FindTool("llvm-objdump"),
+ "--disassemble",
+ "--no-print-imm-hex",
+ "--no-show-raw-insn",
+ // Disassemble Standard Extensions supported by the assembler.
+ "--mattr=+F,+D,+A,+Zba,+Zbb",
+ "-M",
+ "no-aliases"};
default:
return {
FindTool("llvm-objdump"), "--disassemble", "--no-print-imm-hex", "--no-show-raw-insn"};
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 672cd3d10f..53cb3d6f8e 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -79,7 +79,7 @@ class ArmVIXLAssemblerTest : public AssemblerTestBase {
size_t cs = __ CodeSize();
std::vector<uint8_t> managed_code(cs);
MemoryRegion code(&managed_code[0], managed_code.size());
- __ FinalizeInstructions(code);
+ __ CopyInstructions(code);
DumpAndCheck(managed_code, testname, expected);
}
diff --git a/compiler/utils/jni_macro_assembler.cc b/compiler/utils/jni_macro_assembler.cc
index 8b47b38e63..dc7ec60032 100644
--- a/compiler/utils/jni_macro_assembler.cc
+++ b/compiler/utils/jni_macro_assembler.cc
@@ -25,6 +25,9 @@
#ifdef ART_ENABLE_CODEGEN_arm64
#include "arm64/jni_macro_assembler_arm64.h"
#endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+#include "riscv64/jni_macro_assembler_riscv64.h"
+#endif
#ifdef ART_ENABLE_CODEGEN_x86
#include "x86/jni_macro_assembler_x86.h"
#endif
@@ -34,6 +37,7 @@
#include "base/casts.h"
#include "base/globals.h"
#include "base/memory_region.h"
+#include "gc_root.h"
namespace art HIDDEN {
@@ -79,6 +83,10 @@ MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create(
case InstructionSet::kArm64:
return MacroAsm64UniquePtr(new (allocator) arm64::Arm64JNIMacroAssembler(allocator));
#endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+ case InstructionSet::kRiscv64:
+ return MacroAsm64UniquePtr(new (allocator) riscv64::Riscv64JNIMacroAssembler(allocator));
+#endif
#ifdef ART_ENABLE_CODEGEN_x86_64
case InstructionSet::kX86_64:
return MacroAsm64UniquePtr(new (allocator) x86_64::X86_64JNIMacroAssembler(allocator));
@@ -90,4 +98,21 @@ MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create(
}
}
+template <PointerSize kPointerSize>
+void JNIMacroAssembler<kPointerSize>::LoadGcRootWithoutReadBarrier(ManagedRegister dest,
+ ManagedRegister base,
+ MemberOffset offs) {
+ static_assert(sizeof(uint32_t) == sizeof(GcRoot<mirror::Object>));
+ Load(dest, base, offs, sizeof(uint32_t));
+}
+
+template
+void JNIMacroAssembler<PointerSize::k32>::LoadGcRootWithoutReadBarrier(ManagedRegister dest,
+ ManagedRegister base,
+ MemberOffset offs);
+template
+void JNIMacroAssembler<PointerSize::k64>::LoadGcRootWithoutReadBarrier(ManagedRegister dest,
+ ManagedRegister base,
+ MemberOffset offs);
+
} // namespace art
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
index 0c729705dc..2d51439ee8 100644
--- a/compiler/utils/jni_macro_assembler.h
+++ b/compiler/utils/jni_macro_assembler.h
@@ -92,7 +92,7 @@ class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> {
virtual size_t CodeSize() const = 0;
// Copy instructions out of assembly buffer into the given region of memory
- virtual void FinalizeInstructions(const MemoryRegion& region) = 0;
+ virtual void CopyInstructions(const MemoryRegion& region) = 0;
// Emit code that will create an activation on the stack
virtual void BuildFrame(size_t frame_size,
@@ -129,9 +129,14 @@ class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> {
// Load routines
virtual void Load(ManagedRegister dest, FrameOffset src, size_t size) = 0;
virtual void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) = 0;
-
virtual void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset<kPointerSize> offs) = 0;
+ // Load reference from a `GcRoot<>`. The default is to load as `jint`. Some architectures
+ // (say, RISC-V) override this to provide a different sign- or zero-extension.
+ virtual void LoadGcRootWithoutReadBarrier(ManagedRegister dest,
+ ManagedRegister base,
+ MemberOffset offs);
+
// Copying routines
// Move arguments from `srcs` locations to `dests` locations.
@@ -266,8 +271,8 @@ class JNIMacroAssemblerFwd : public JNIMacroAssembler<kPointerSize> {
return asm_.CodeSize();
}
- void FinalizeInstructions(const MemoryRegion& region) override {
- asm_.FinalizeInstructions(region);
+ void CopyInstructions(const MemoryRegion& region) override {
+ asm_.CopyInstructions(region);
}
DebugFrameOpCodeWriterForAssembler& cfi() override {
diff --git a/compiler/utils/jni_macro_assembler_test.h b/compiler/utils/jni_macro_assembler_test.h
index ac8e7d3010..ff182e6146 100644
--- a/compiler/utils/jni_macro_assembler_test.h
+++ b/compiler/utils/jni_macro_assembler_test.h
@@ -77,15 +77,14 @@ class JNIMacroAssemblerTest : public AssemblerTestBase {
private:
// Override this to pad the code with NOPs to a certain size if needed.
- virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) {
- }
+ virtual void Pad([[maybe_unused]] std::vector<uint8_t>& data) {}
void DriverWrapper(const std::string& assembly_text, const std::string& test_name) {
assembler_->FinalizeCode();
size_t cs = assembler_->CodeSize();
std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
MemoryRegion code(&(*data)[0], data->size());
- assembler_->FinalizeInstructions(code);
+ assembler_->CopyInstructions(code);
Pad(*data);
Driver(*data, assembly_text, test_name);
}
diff --git a/compiler/utils/label.h b/compiler/utils/label.h
index 0368d90a26..25bf01376b 100644
--- a/compiler/utils/label.h
+++ b/compiler/utils/label.h
@@ -31,6 +31,10 @@ class AssemblerFixup;
namespace arm64 {
class Arm64Assembler;
} // namespace arm64
+namespace riscv64 {
+class Riscv64Assembler;
+class Riscv64Label;
+} // namespace riscv64
namespace x86 {
class X86Assembler;
class NearLabel;
@@ -109,6 +113,8 @@ class Label {
}
friend class arm64::Arm64Assembler;
+ friend class riscv64::Riscv64Assembler;
+ friend class riscv64::Riscv64Label;
friend class x86::X86Assembler;
friend class x86::NearLabel;
friend class x86_64::X86_64Assembler;
diff --git a/compiler/utils/riscv64/assembler_riscv64.cc b/compiler/utils/riscv64/assembler_riscv64.cc
new file mode 100644
index 0000000000..089bc5dfe6
--- /dev/null
+++ b/compiler/utils/riscv64/assembler_riscv64.cc
@@ -0,0 +1,2422 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "assembler_riscv64.h"
+
+#include "base/bit_utils.h"
+#include "base/casts.h"
+#include "base/logging.h"
+#include "base/memory_region.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+static_assert(static_cast<size_t>(kRiscv64PointerSize) == kRiscv64DoublewordSize,
+ "Unexpected Riscv64 pointer size.");
+static_assert(kRiscv64PointerSize == PointerSize::k64, "Unexpected Riscv64 pointer size.");
+
+// Split 32-bit offset into an `imm20` for LUI/AUIPC and
+// a signed 12-bit short offset for ADDI/JALR/etc.
+ALWAYS_INLINE static inline std::pair<uint32_t, int32_t> SplitOffset(int32_t offset) {
+ // The highest 0x800 values are out of range.
+ DCHECK_LT(offset, 0x7ffff800);
+ // Round `offset` to nearest 4KiB offset because short offset has range [-0x800, 0x800).
+ int32_t near_offset = (offset + 0x800) & ~0xfff;
+ // Calculate the short offset.
+ int32_t short_offset = offset - near_offset;
+ DCHECK(IsInt<12>(short_offset));
+ // Extract the `imm20`.
+ uint32_t imm20 = static_cast<uint32_t>(near_offset) >> 12;
+ // Return the result as a pair.
+ return std::make_pair(imm20, short_offset);
+}
+
+ALWAYS_INLINE static inline int32_t ToInt12(uint32_t uint12) {
+ DCHECK(IsUint<12>(uint12));
+ return static_cast<int32_t>(uint12 - ((uint12 & 0x800) << 1));
+}
+
+void Riscv64Assembler::FinalizeCode() {
+ CHECK(!finalized_);
+ Assembler::FinalizeCode();
+ ReserveJumpTableSpace();
+ EmitLiterals();
+ PromoteBranches();
+ EmitBranches();
+ EmitJumpTables();
+ PatchCFI();
+ finalized_ = true;
+}
+
+void Riscv64Assembler::Emit(uint32_t value) {
+ if (overwriting_) {
+ // Branches to labels are emitted into their placeholders here.
+ buffer_.Store<uint32_t>(overwrite_location_, value);
+ overwrite_location_ += sizeof(uint32_t);
+ } else {
+ // Other instructions are simply appended at the end here.
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ buffer_.Emit<uint32_t>(value);
+ }
+}
+
+/////////////////////////////// RV64 VARIANTS extension ///////////////////////////////
+
+//////////////////////////////// RV64 "I" Instructions ////////////////////////////////
+
+// LUI/AUIPC (RV32I, with sign-extension on RV64I), opcode = 0x17, 0x37
+
+void Riscv64Assembler::Lui(XRegister rd, uint32_t imm20) {
+ EmitU(imm20, rd, 0x37);
+}
+
+void Riscv64Assembler::Auipc(XRegister rd, uint32_t imm20) {
+ EmitU(imm20, rd, 0x17);
+}
+
+// Jump instructions (RV32I), opcode = 0x67, 0x6f
+
+void Riscv64Assembler::Jal(XRegister rd, int32_t offset) {
+ EmitJ(offset, rd, 0x6F);
+}
+
+void Riscv64Assembler::Jalr(XRegister rd, XRegister rs1, int32_t offset) {
+ EmitI(offset, rs1, 0x0, rd, 0x67);
+}
+
+// Branch instructions, opcode = 0x63 (subfunc from 0x0 ~ 0x7), 0x67, 0x6f
+
+void Riscv64Assembler::Beq(XRegister rs1, XRegister rs2, int32_t offset) {
+ EmitB(offset, rs2, rs1, 0x0, 0x63);
+}
+
+void Riscv64Assembler::Bne(XRegister rs1, XRegister rs2, int32_t offset) {
+ EmitB(offset, rs2, rs1, 0x1, 0x63);
+}
+
+void Riscv64Assembler::Blt(XRegister rs1, XRegister rs2, int32_t offset) {
+ EmitB(offset, rs2, rs1, 0x4, 0x63);
+}
+
+void Riscv64Assembler::Bge(XRegister rs1, XRegister rs2, int32_t offset) {
+ EmitB(offset, rs2, rs1, 0x5, 0x63);
+}
+
+void Riscv64Assembler::Bltu(XRegister rs1, XRegister rs2, int32_t offset) {
+ EmitB(offset, rs2, rs1, 0x6, 0x63);
+}
+
+void Riscv64Assembler::Bgeu(XRegister rs1, XRegister rs2, int32_t offset) {
+ EmitB(offset, rs2, rs1, 0x7, 0x63);
+}
+
+// Load instructions (RV32I+RV64I): opcode = 0x03, funct3 from 0x0 ~ 0x6
+
+void Riscv64Assembler::Lb(XRegister rd, XRegister rs1, int32_t offset) {
+ EmitI(offset, rs1, 0x0, rd, 0x03);
+}
+
+void Riscv64Assembler::Lh(XRegister rd, XRegister rs1, int32_t offset) {
+ EmitI(offset, rs1, 0x1, rd, 0x03);
+}
+
+void Riscv64Assembler::Lw(XRegister rd, XRegister rs1, int32_t offset) {
+ EmitI(offset, rs1, 0x2, rd, 0x03);
+}
+
+void Riscv64Assembler::Ld(XRegister rd, XRegister rs1, int32_t offset) {
+ EmitI(offset, rs1, 0x3, rd, 0x03);
+}
+
+void Riscv64Assembler::Lbu(XRegister rd, XRegister rs1, int32_t offset) {
+ EmitI(offset, rs1, 0x4, rd, 0x03);
+}
+
+void Riscv64Assembler::Lhu(XRegister rd, XRegister rs1, int32_t offset) {
+ EmitI(offset, rs1, 0x5, rd, 0x03);
+}
+
+void Riscv64Assembler::Lwu(XRegister rd, XRegister rs1, int32_t offset) {
+ EmitI(offset, rs1, 0x6, rd, 0x3);
+}
+
+// Store instructions (RV32I+RV64I): opcode = 0x23, funct3 from 0x0 ~ 0x3
+
+void Riscv64Assembler::Sb(XRegister rs2, XRegister rs1, int32_t offset) {
+ EmitS(offset, rs2, rs1, 0x0, 0x23);
+}
+
+void Riscv64Assembler::Sh(XRegister rs2, XRegister rs1, int32_t offset) {
+ EmitS(offset, rs2, rs1, 0x1, 0x23);
+}
+
+void Riscv64Assembler::Sw(XRegister rs2, XRegister rs1, int32_t offset) {
+ EmitS(offset, rs2, rs1, 0x2, 0x23);
+}
+
+void Riscv64Assembler::Sd(XRegister rs2, XRegister rs1, int32_t offset) {
+ EmitS(offset, rs2, rs1, 0x3, 0x23);
+}
+
+// IMM ALU instructions (RV32I): opcode = 0x13, funct3 from 0x0 ~ 0x7
+
+void Riscv64Assembler::Addi(XRegister rd, XRegister rs1, int32_t imm12) {
+ EmitI(imm12, rs1, 0x0, rd, 0x13);
+}
+
+void Riscv64Assembler::Slti(XRegister rd, XRegister rs1, int32_t imm12) {
+ EmitI(imm12, rs1, 0x2, rd, 0x13);
+}
+
+void Riscv64Assembler::Sltiu(XRegister rd, XRegister rs1, int32_t imm12) {
+ EmitI(imm12, rs1, 0x3, rd, 0x13);
+}
+
+void Riscv64Assembler::Xori(XRegister rd, XRegister rs1, int32_t imm12) {
+ EmitI(imm12, rs1, 0x4, rd, 0x13);
+}
+
+void Riscv64Assembler::Ori(XRegister rd, XRegister rs1, int32_t imm12) {
+ EmitI(imm12, rs1, 0x6, rd, 0x13);
+}
+
+void Riscv64Assembler::Andi(XRegister rd, XRegister rs1, int32_t imm12) {
+ EmitI(imm12, rs1, 0x7, rd, 0x13);
+}
+
+// 0x1 Split: 0x0(6b) + imm12(6b)
+void Riscv64Assembler::Slli(XRegister rd, XRegister rs1, int32_t shamt) {
+ CHECK_LT(static_cast<uint32_t>(shamt), 64u);
+ EmitI6(0x0, shamt, rs1, 0x1, rd, 0x13);
+}
+
+// 0x5 Split: 0x0(6b) + imm12(6b)
+void Riscv64Assembler::Srli(XRegister rd, XRegister rs1, int32_t shamt) {
+ CHECK_LT(static_cast<uint32_t>(shamt), 64u);
+ EmitI6(0x0, shamt, rs1, 0x5, rd, 0x13);
+}
+
+// 0x5 Split: 0x10(6b) + imm12(6b)
+void Riscv64Assembler::Srai(XRegister rd, XRegister rs1, int32_t shamt) {
+ CHECK_LT(static_cast<uint32_t>(shamt), 64u);
+ EmitI6(0x10, shamt, rs1, 0x5, rd, 0x13);
+}
+
+// ALU instructions (RV32I): opcode = 0x33, funct3 from 0x0 ~ 0x7
+
+void Riscv64Assembler::Add(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x0, rs2, rs1, 0x0, rd, 0x33);
+}
+
+void Riscv64Assembler::Sub(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x20, rs2, rs1, 0x0, rd, 0x33);
+}
+
+void Riscv64Assembler::Slt(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x0, rs2, rs1, 0x02, rd, 0x33);
+}
+
+void Riscv64Assembler::Sltu(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x0, rs2, rs1, 0x03, rd, 0x33);
+}
+
+void Riscv64Assembler::Xor(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x0, rs2, rs1, 0x04, rd, 0x33);
+}
+
+void Riscv64Assembler::Or(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x0, rs2, rs1, 0x06, rd, 0x33);
+}
+
+void Riscv64Assembler::And(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x0, rs2, rs1, 0x07, rd, 0x33);
+}
+
+void Riscv64Assembler::Sll(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x0, rs2, rs1, 0x01, rd, 0x33);
+}
+
+void Riscv64Assembler::Srl(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x0, rs2, rs1, 0x05, rd, 0x33);
+}
+
+void Riscv64Assembler::Sra(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x20, rs2, rs1, 0x05, rd, 0x33);
+}
+
+// 32bit Imm ALU instructions (RV64I): opcode = 0x1b, funct3 from 0x0, 0x1, 0x5
+
+void Riscv64Assembler::Addiw(XRegister rd, XRegister rs1, int32_t imm12) {
+ EmitI(imm12, rs1, 0x0, rd, 0x1b);
+}
+
+void Riscv64Assembler::Slliw(XRegister rd, XRegister rs1, int32_t shamt) {
+ CHECK_LT(static_cast<uint32_t>(shamt), 32u);
+ EmitR(0x0, shamt, rs1, 0x1, rd, 0x1b);
+}
+
+void Riscv64Assembler::Srliw(XRegister rd, XRegister rs1, int32_t shamt) {
+ CHECK_LT(static_cast<uint32_t>(shamt), 32u);
+ EmitR(0x0, shamt, rs1, 0x5, rd, 0x1b);
+}
+
+void Riscv64Assembler::Sraiw(XRegister rd, XRegister rs1, int32_t shamt) {
+ CHECK_LT(static_cast<uint32_t>(shamt), 32u);
+ EmitR(0x20, shamt, rs1, 0x5, rd, 0x1b);
+}
+
+// 32bit ALU instructions (RV64I): opcode = 0x3b, funct3 from 0x0 ~ 0x7
+
+void Riscv64Assembler::Addw(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x0, rs2, rs1, 0x0, rd, 0x3b);
+}
+
+void Riscv64Assembler::Subw(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x20, rs2, rs1, 0x0, rd, 0x3b);
+}
+
+void Riscv64Assembler::Sllw(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x0, rs2, rs1, 0x1, rd, 0x3b);
+}
+
+void Riscv64Assembler::Srlw(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x0, rs2, rs1, 0x5, rd, 0x3b);
+}
+
+void Riscv64Assembler::Sraw(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x20, rs2, rs1, 0x5, rd, 0x3b);
+}
+
+// Environment call and breakpoint (RV32I), opcode = 0x73
+
+void Riscv64Assembler::Ecall() { EmitI(0x0, 0x0, 0x0, 0x0, 0x73); }
+
+void Riscv64Assembler::Ebreak() { EmitI(0x1, 0x0, 0x0, 0x0, 0x73); }
+
+// Fence instruction (RV32I): opcode = 0xf, funct3 = 0
+
+void Riscv64Assembler::Fence(uint32_t pred, uint32_t succ) {
+ DCHECK(IsUint<4>(pred));
+ DCHECK(IsUint<4>(succ));
+ EmitI(/* normal fence */ 0x0 << 8 | pred << 4 | succ, 0x0, 0x0, 0x0, 0xf);
+}
+
+void Riscv64Assembler::FenceTso() {
+ static constexpr uint32_t kPred = kFenceWrite | kFenceRead;
+ static constexpr uint32_t kSucc = kFenceWrite | kFenceRead;
+ EmitI(ToInt12(/* TSO fence */ 0x8 << 8 | kPred << 4 | kSucc), 0x0, 0x0, 0x0, 0xf);
+}
+
+//////////////////////////////// RV64 "I" Instructions END ////////////////////////////////
+
+/////////////////////////// RV64 "Zifencei" Instructions START ////////////////////////////
+
+// "Zifencei" Standard Extension, opcode = 0xf, funct3 = 1
+void Riscv64Assembler::FenceI() { EmitI(0x0, 0x0, 0x1, 0x0, 0xf); }
+
+//////////////////////////// RV64 "Zifencei" Instructions END /////////////////////////////
+
+/////////////////////////////// RV64 "M" Instructions START ///////////////////////////////
+
+// RV32M Standard Extension: opcode = 0x33, funct3 from 0x0 ~ 0x7
+
+void Riscv64Assembler::Mul(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x1, rs2, rs1, 0x0, rd, 0x33);
+}
+
+void Riscv64Assembler::Mulh(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x1, rs2, rs1, 0x1, rd, 0x33);
+}
+
+void Riscv64Assembler::Mulhsu(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x1, rs2, rs1, 0x2, rd, 0x33);
+}
+
+void Riscv64Assembler::Mulhu(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x1, rs2, rs1, 0x3, rd, 0x33);
+}
+
+void Riscv64Assembler::Div(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x1, rs2, rs1, 0x4, rd, 0x33);
+}
+
+void Riscv64Assembler::Divu(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x1, rs2, rs1, 0x5, rd, 0x33);
+}
+
+void Riscv64Assembler::Rem(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x1, rs2, rs1, 0x6, rd, 0x33);
+}
+
+void Riscv64Assembler::Remu(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x1, rs2, rs1, 0x7, rd, 0x33);
+}
+
+// RV64M Standard Extension: opcode = 0x3b, funct3 0x0 and from 0x4 ~ 0x7
+
+void Riscv64Assembler::Mulw(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x1, rs2, rs1, 0x0, rd, 0x3b);
+}
+
+void Riscv64Assembler::Divw(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x1, rs2, rs1, 0x4, rd, 0x3b);
+}
+
+void Riscv64Assembler::Divuw(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x1, rs2, rs1, 0x5, rd, 0x3b);
+}
+
+void Riscv64Assembler::Remw(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x1, rs2, rs1, 0x6, rd, 0x3b);
+}
+
+void Riscv64Assembler::Remuw(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x1, rs2, rs1, 0x7, rd, 0x3b);
+}
+
+//////////////////////////////// RV64 "M" Instructions END ////////////////////////////////
+
+/////////////////////////////// RV64 "A" Instructions START ///////////////////////////////
+
+void Riscv64Assembler::LrW(XRegister rd, XRegister rs1, AqRl aqrl) {
+ CHECK(aqrl != AqRl::kRelease);
+ EmitR4(0x2, enum_cast<uint32_t>(aqrl), 0x0, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::LrD(XRegister rd, XRegister rs1, AqRl aqrl) {
+ CHECK(aqrl != AqRl::kRelease);
+ EmitR4(0x2, enum_cast<uint32_t>(aqrl), 0x0, rs1, 0x3, rd, 0x2f);
+}
+
+void Riscv64Assembler::ScW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+ CHECK(aqrl != AqRl::kAcquire);
+ EmitR4(0x3, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::ScD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+ CHECK(aqrl != AqRl::kAcquire);
+ EmitR4(0x3, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoSwapW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+ EmitR4(0x1, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoSwapD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+ EmitR4(0x1, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoAddW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+ EmitR4(0x0, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoAddD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+ EmitR4(0x0, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoXorW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+ EmitR4(0x4, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoXorD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+ EmitR4(0x4, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoAndW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+ EmitR4(0xc, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoAndD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+ EmitR4(0xc, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoOrW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+ EmitR4(0x8, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoOrD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+ EmitR4(0x8, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoMinW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+ EmitR4(0x10, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoMinD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+ EmitR4(0x10, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoMaxW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+ EmitR4(0x14, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoMaxD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+ EmitR4(0x14, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoMinuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+ EmitR4(0x18, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoMinuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+ EmitR4(0x18, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoMaxuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+ EmitR4(0x1c, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoMaxuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+ EmitR4(0x1c, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f);
+}
+
+/////////////////////////////// RV64 "A" Instructions END ///////////////////////////////
+
+///////////////////////////// RV64 "Zicsr" Instructions START /////////////////////////////
+
+// "Zicsr" Standard Extension, opcode = 0x73, funct3 from 0x1 ~ 0x3 and 0x5 ~ 0x7
+
+void Riscv64Assembler::Csrrw(XRegister rd, uint32_t csr, XRegister rs1) {
+ EmitI(ToInt12(csr), rs1, 0x1, rd, 0x73);
+}
+
+void Riscv64Assembler::Csrrs(XRegister rd, uint32_t csr, XRegister rs1) {
+ EmitI(ToInt12(csr), rs1, 0x2, rd, 0x73);
+}
+
+void Riscv64Assembler::Csrrc(XRegister rd, uint32_t csr, XRegister rs1) {
+ EmitI(ToInt12(csr), rs1, 0x3, rd, 0x73);
+}
+
+void Riscv64Assembler::Csrrwi(XRegister rd, uint32_t csr, uint32_t uimm5) {
+ EmitI(ToInt12(csr), uimm5, 0x5, rd, 0x73);
+}
+
+void Riscv64Assembler::Csrrsi(XRegister rd, uint32_t csr, uint32_t uimm5) {
+ EmitI(ToInt12(csr), uimm5, 0x6, rd, 0x73);
+}
+
+void Riscv64Assembler::Csrrci(XRegister rd, uint32_t csr, uint32_t uimm5) {
+ EmitI(ToInt12(csr), uimm5, 0x7, rd, 0x73);
+}
+
+////////////////////////////// RV64 "Zicsr" Instructions END //////////////////////////////
+
+/////////////////////////////// RV64 "FD" Instructions START ///////////////////////////////
+
+// FP load/store instructions (RV32F+RV32D): opcode = 0x07, 0x27
+
+void Riscv64Assembler::FLw(FRegister rd, XRegister rs1, int32_t offset) {
+ EmitI(offset, rs1, 0x2, rd, 0x07);
+}
+
+void Riscv64Assembler::FLd(FRegister rd, XRegister rs1, int32_t offset) {
+ EmitI(offset, rs1, 0x3, rd, 0x07);
+}
+
+void Riscv64Assembler::FSw(FRegister rs2, XRegister rs1, int32_t offset) {
+ EmitS(offset, rs2, rs1, 0x2, 0x27);
+}
+
+void Riscv64Assembler::FSd(FRegister rs2, XRegister rs1, int32_t offset) {
+ EmitS(offset, rs2, rs1, 0x3, 0x27);
+}
+
+// FP FMA instructions (RV32F+RV32D): opcode = 0x43, 0x47, 0x4b, 0x4f
+
+void Riscv64Assembler::FMAddS(
+ FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) {
+ EmitR4(rs3, 0x0, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x43);
+}
+
+void Riscv64Assembler::FMAddD(
+ FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) {
+ EmitR4(rs3, 0x1, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x43);
+}
+
+void Riscv64Assembler::FMSubS(
+ FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) {
+ EmitR4(rs3, 0x0, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x47);
+}
+
+void Riscv64Assembler::FMSubD(
+ FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) {
+ EmitR4(rs3, 0x1, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x47);
+}
+
+void Riscv64Assembler::FNMSubS(
+ FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) {
+ EmitR4(rs3, 0x0, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x4b);
+}
+
+void Riscv64Assembler::FNMSubD(
+ FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) {
+ EmitR4(rs3, 0x1, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x4b);
+}
+
+void Riscv64Assembler::FNMAddS(
+ FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) {
+ EmitR4(rs3, 0x0, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x4f);
+}
+
+void Riscv64Assembler::FNMAddD(
+ FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) {
+ EmitR4(rs3, 0x1, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x4f);
+}
+
+// Simple FP instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b0XXXX0D
+
+void Riscv64Assembler::FAddS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) {
+ EmitR(0x0, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FAddD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) {
+ EmitR(0x1, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FSubS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) {
+ EmitR(0x4, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FSubD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) {
+ EmitR(0x5, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FMulS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) {
+ EmitR(0x8, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FMulD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) {
+ EmitR(0x9, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FDivS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) {
+ EmitR(0xc, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FDivD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) {
+ EmitR(0xd, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FSqrtS(FRegister rd, FRegister rs1, FPRoundingMode frm) {
+ EmitR(0x2c, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FSqrtD(FRegister rd, FRegister rs1, FPRoundingMode frm) {
+ EmitR(0x2d, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FSgnjS(FRegister rd, FRegister rs1, FRegister rs2) {
+ EmitR(0x10, rs2, rs1, 0x0, rd, 0x53);
+}
+
+void Riscv64Assembler::FSgnjD(FRegister rd, FRegister rs1, FRegister rs2) {
+ EmitR(0x11, rs2, rs1, 0x0, rd, 0x53);
+}
+
+void Riscv64Assembler::FSgnjnS(FRegister rd, FRegister rs1, FRegister rs2) {
+ EmitR(0x10, rs2, rs1, 0x1, rd, 0x53);
+}
+
+void Riscv64Assembler::FSgnjnD(FRegister rd, FRegister rs1, FRegister rs2) {
+ EmitR(0x11, rs2, rs1, 0x1, rd, 0x53);
+}
+
+void Riscv64Assembler::FSgnjxS(FRegister rd, FRegister rs1, FRegister rs2) {
+ EmitR(0x10, rs2, rs1, 0x2, rd, 0x53);
+}
+
+void Riscv64Assembler::FSgnjxD(FRegister rd, FRegister rs1, FRegister rs2) {
+ EmitR(0x11, rs2, rs1, 0x2, rd, 0x53);
+}
+
+void Riscv64Assembler::FMinS(FRegister rd, FRegister rs1, FRegister rs2) {
+ EmitR(0x14, rs2, rs1, 0x0, rd, 0x53);
+}
+
+void Riscv64Assembler::FMinD(FRegister rd, FRegister rs1, FRegister rs2) {
+ EmitR(0x15, rs2, rs1, 0x0, rd, 0x53);
+}
+
+void Riscv64Assembler::FMaxS(FRegister rd, FRegister rs1, FRegister rs2) {
+ EmitR(0x14, rs2, rs1, 0x1, rd, 0x53);
+}
+
+void Riscv64Assembler::FMaxD(FRegister rd, FRegister rs1, FRegister rs2) {
+ EmitR(0x15, rs2, rs1, 0x1, rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtSD(FRegister rd, FRegister rs1, FPRoundingMode frm) {
+ EmitR(0x20, 0x1, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtDS(FRegister rd, FRegister rs1, FPRoundingMode frm) {
+ // Note: The `frm` is useless, the result can represent every value of the source exactly.
+ EmitR(0x21, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+// FP compare instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b101000D
+
+void Riscv64Assembler::FEqS(XRegister rd, FRegister rs1, FRegister rs2) {
+ EmitR(0x50, rs2, rs1, 0x2, rd, 0x53);
+}
+
+void Riscv64Assembler::FEqD(XRegister rd, FRegister rs1, FRegister rs2) {
+ EmitR(0x51, rs2, rs1, 0x2, rd, 0x53);
+}
+
+void Riscv64Assembler::FLtS(XRegister rd, FRegister rs1, FRegister rs2) {
+ EmitR(0x50, rs2, rs1, 0x1, rd, 0x53);
+}
+
+void Riscv64Assembler::FLtD(XRegister rd, FRegister rs1, FRegister rs2) {
+ EmitR(0x51, rs2, rs1, 0x1, rd, 0x53);
+}
+
+void Riscv64Assembler::FLeS(XRegister rd, FRegister rs1, FRegister rs2) {
+ EmitR(0x50, rs2, rs1, 0x0, rd, 0x53);
+}
+
+void Riscv64Assembler::FLeD(XRegister rd, FRegister rs1, FRegister rs2) {
+ EmitR(0x51, rs2, rs1, 0x0, rd, 0x53);
+}
+
+// FP conversion instructions (RV32F+RV32D+RV64F+RV64D): opcode = 0x53, funct7 = 0b110X00D
+
+void Riscv64Assembler::FCvtWS(XRegister rd, FRegister rs1, FPRoundingMode frm) {
+ EmitR(0x60, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtWD(XRegister rd, FRegister rs1, FPRoundingMode frm) {
+ EmitR(0x61, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtWuS(XRegister rd, FRegister rs1, FPRoundingMode frm) {
+ EmitR(0x60, 0x1, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtWuD(XRegister rd, FRegister rs1, FPRoundingMode frm) {
+ EmitR(0x61, 0x1, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtLS(XRegister rd, FRegister rs1, FPRoundingMode frm) {
+ EmitR(0x60, 0x2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtLD(XRegister rd, FRegister rs1, FPRoundingMode frm) {
+ EmitR(0x61, 0x2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtLuS(XRegister rd, FRegister rs1, FPRoundingMode frm) {
+ EmitR(0x60, 0x3, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtLuD(XRegister rd, FRegister rs1, FPRoundingMode frm) {
+ EmitR(0x61, 0x3, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtSW(FRegister rd, XRegister rs1, FPRoundingMode frm) {
+ EmitR(0x68, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtDW(FRegister rd, XRegister rs1, FPRoundingMode frm) {
+ // Note: The `frm` is useless, the result can represent every value of the source exactly.
+ EmitR(0x69, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtSWu(FRegister rd, XRegister rs1, FPRoundingMode frm) {
+ EmitR(0x68, 0x1, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtDWu(FRegister rd, XRegister rs1, FPRoundingMode frm) {
+ // Note: The `frm` is useless, the result can represent every value of the source exactly.
+ EmitR(0x69, 0x1, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtSL(FRegister rd, XRegister rs1, FPRoundingMode frm) {
+ EmitR(0x68, 0x2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtDL(FRegister rd, XRegister rs1, FPRoundingMode frm) {
+ EmitR(0x69, 0x2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtSLu(FRegister rd, XRegister rs1, FPRoundingMode frm) {
+ EmitR(0x68, 0x3, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtDLu(FRegister rd, XRegister rs1, FPRoundingMode frm) {
+ EmitR(0x69, 0x3, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+// FP move instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x0, funct7 = 0b111X00D
+
+void Riscv64Assembler::FMvXW(XRegister rd, FRegister rs1) {
+ EmitR(0x70, 0x0, rs1, 0x0, rd, 0x53);
+}
+
+void Riscv64Assembler::FMvXD(XRegister rd, FRegister rs1) {
+ EmitR(0x71, 0x0, rs1, 0x0, rd, 0x53);
+}
+
+void Riscv64Assembler::FMvWX(FRegister rd, XRegister rs1) {
+ EmitR(0x78, 0x0, rs1, 0x0, rd, 0x53);
+}
+
+void Riscv64Assembler::FMvDX(FRegister rd, XRegister rs1) {
+ EmitR(0x79, 0x0, rs1, 0x0, rd, 0x53);
+}
+
+// FP classify instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x1, funct7 = 0b111X00D
+
+void Riscv64Assembler::FClassS(XRegister rd, FRegister rs1) {
+ EmitR(0x70, 0x0, rs1, 0x1, rd, 0x53);
+}
+
+void Riscv64Assembler::FClassD(XRegister rd, FRegister rs1) {
+ EmitR(0x71, 0x0, rs1, 0x1, rd, 0x53);
+}
+
+/////////////////////////////// RV64 "FD" Instructions END ///////////////////////////////
+
+////////////////////////////// RV64 "Zba" Instructions START /////////////////////////////
+
+void Riscv64Assembler::AddUw(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x4, rs2, rs1, 0x0, rd, 0x3b);
+}
+
+void Riscv64Assembler::Sh1Add(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x10, rs2, rs1, 0x2, rd, 0x33);
+}
+
+void Riscv64Assembler::Sh1AddUw(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x10, rs2, rs1, 0x2, rd, 0x3b);
+}
+
+void Riscv64Assembler::Sh2Add(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x10, rs2, rs1, 0x4, rd, 0x33);
+}
+
+void Riscv64Assembler::Sh2AddUw(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x10, rs2, rs1, 0x4, rd, 0x3b);
+}
+
+void Riscv64Assembler::Sh3Add(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x10, rs2, rs1, 0x6, rd, 0x33);
+}
+
+void Riscv64Assembler::Sh3AddUw(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x10, rs2, rs1, 0x6, rd, 0x3b);
+}
+
+void Riscv64Assembler::SlliUw(XRegister rd, XRegister rs1, int32_t shamt) {
+ EmitI6(0x2, shamt, rs1, 0x1, rd, 0x1b);
+}
+
+/////////////////////////////// RV64 "Zba" Instructions END //////////////////////////////
+
+////////////////////////////// RV64 "Zbb" Instructions START /////////////////////////////
+
+void Riscv64Assembler::Andn(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x20, rs2, rs1, 0x7, rd, 0x33);
+}
+
+void Riscv64Assembler::Orn(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x20, rs2, rs1, 0x6, rd, 0x33);
+}
+
+void Riscv64Assembler::Xnor(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x20, rs2, rs1, 0x4, rd, 0x33);
+}
+
+void Riscv64Assembler::Clz(XRegister rd, XRegister rs1) {
+ EmitR(0x30, 0x0, rs1, 0x1, rd, 0x13);
+}
+
+void Riscv64Assembler::Clzw(XRegister rd, XRegister rs1) {
+ EmitR(0x30, 0x0, rs1, 0x1, rd, 0x1b);
+}
+
+void Riscv64Assembler::Ctz(XRegister rd, XRegister rs1) {
+ EmitR(0x30, 0x1, rs1, 0x1, rd, 0x13);
+}
+
+void Riscv64Assembler::Ctzw(XRegister rd, XRegister rs1) {
+ EmitR(0x30, 0x1, rs1, 0x1, rd, 0x1b);
+}
+
+void Riscv64Assembler::Cpop(XRegister rd, XRegister rs1) {
+ EmitR(0x30, 0x2, rs1, 0x1, rd, 0x13);
+}
+
+void Riscv64Assembler::Cpopw(XRegister rd, XRegister rs1) {
+ EmitR(0x30, 0x2, rs1, 0x1, rd, 0x1b);
+}
+
+void Riscv64Assembler::Min(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x5, rs2, rs1, 0x4, rd, 0x33);
+}
+
+void Riscv64Assembler::Minu(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x5, rs2, rs1, 0x5, rd, 0x33);
+}
+
+void Riscv64Assembler::Max(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x5, rs2, rs1, 0x6, rd, 0x33);
+}
+
+void Riscv64Assembler::Maxu(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x5, rs2, rs1, 0x7, rd, 0x33);
+}
+
+void Riscv64Assembler::Rol(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x30, rs2, rs1, 0x1, rd, 0x33);
+}
+
+void Riscv64Assembler::Rolw(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x30, rs2, rs1, 0x1, rd, 0x3b);
+}
+
+void Riscv64Assembler::Ror(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x30, rs2, rs1, 0x5, rd, 0x33);
+}
+
+void Riscv64Assembler::Rorw(XRegister rd, XRegister rs1, XRegister rs2) {
+ EmitR(0x30, rs2, rs1, 0x5, rd, 0x3b);
+}
+
+void Riscv64Assembler::Rori(XRegister rd, XRegister rs1, int32_t shamt) {
+ CHECK_LT(static_cast<uint32_t>(shamt), 64u);
+ EmitI6(0x18, shamt, rs1, 0x5, rd, 0x13);
+}
+
+void Riscv64Assembler::Roriw(XRegister rd, XRegister rs1, int32_t shamt) {
+ CHECK_LT(static_cast<uint32_t>(shamt), 32u);
+ EmitI6(0x18, shamt, rs1, 0x5, rd, 0x1b);
+}
+
+void Riscv64Assembler::OrcB(XRegister rd, XRegister rs1) {
+ EmitR(0x14, 0x7, rs1, 0x5, rd, 0x13);
+}
+
+void Riscv64Assembler::Rev8(XRegister rd, XRegister rs1) {
+ EmitR(0x35, 0x18, rs1, 0x5, rd, 0x13);
+}
+
+/////////////////////////////// RV64 "Zbb" Instructions END //////////////////////////////
+
+////////////////////////////// RV64 MACRO Instructions START ///////////////////////////////
+
+// Pseudo instructions
+
+void Riscv64Assembler::Nop() { Addi(Zero, Zero, 0); }
+
+void Riscv64Assembler::Li(XRegister rd, int64_t imm) {
+ LoadImmediate(rd, imm, /*can_use_tmp=*/ false);
+}
+
+void Riscv64Assembler::Mv(XRegister rd, XRegister rs) { Addi(rd, rs, 0); }
+
+void Riscv64Assembler::Not(XRegister rd, XRegister rs) { Xori(rd, rs, -1); }
+
+void Riscv64Assembler::Neg(XRegister rd, XRegister rs) { Sub(rd, Zero, rs); }
+
+void Riscv64Assembler::NegW(XRegister rd, XRegister rs) { Subw(rd, Zero, rs); }
+
+void Riscv64Assembler::SextB(XRegister rd, XRegister rs) {
+ Slli(rd, rs, kXlen - 8u);
+ Srai(rd, rd, kXlen - 8u);
+}
+
+void Riscv64Assembler::SextH(XRegister rd, XRegister rs) {
+ Slli(rd, rs, kXlen - 16u);
+ Srai(rd, rd, kXlen - 16u);
+}
+
+void Riscv64Assembler::SextW(XRegister rd, XRegister rs) { Addiw(rd, rs, 0); }
+
+void Riscv64Assembler::ZextB(XRegister rd, XRegister rs) { Andi(rd, rs, 0xff); }
+
+void Riscv64Assembler::ZextH(XRegister rd, XRegister rs) {
+ Slli(rd, rs, kXlen - 16u);
+ Srli(rd, rd, kXlen - 16u);
+}
+
+void Riscv64Assembler::ZextW(XRegister rd, XRegister rs) {
+ // TODO(riscv64): Use the ZEXT.W alias for ADD.UW from the Zba extension.
+ Slli(rd, rs, kXlen - 32u);
+ Srli(rd, rd, kXlen - 32u);
+}
+
+void Riscv64Assembler::Seqz(XRegister rd, XRegister rs) { Sltiu(rd, rs, 1); }
+
+void Riscv64Assembler::Snez(XRegister rd, XRegister rs) { Sltu(rd, Zero, rs); }
+
+void Riscv64Assembler::Sltz(XRegister rd, XRegister rs) { Slt(rd, rs, Zero); }
+
+void Riscv64Assembler::Sgtz(XRegister rd, XRegister rs) { Slt(rd, Zero, rs); }
+
+void Riscv64Assembler::FMvS(FRegister rd, FRegister rs) { FSgnjS(rd, rs, rs); }
+
+void Riscv64Assembler::FAbsS(FRegister rd, FRegister rs) { FSgnjxS(rd, rs, rs); }
+
+void Riscv64Assembler::FNegS(FRegister rd, FRegister rs) { FSgnjnS(rd, rs, rs); }
+
+void Riscv64Assembler::FMvD(FRegister rd, FRegister rs) { FSgnjD(rd, rs, rs); }
+
+void Riscv64Assembler::FAbsD(FRegister rd, FRegister rs) { FSgnjxD(rd, rs, rs); }
+
+void Riscv64Assembler::FNegD(FRegister rd, FRegister rs) { FSgnjnD(rd, rs, rs); }
+
+void Riscv64Assembler::Beqz(XRegister rs, int32_t offset) {
+ Beq(rs, Zero, offset);
+}
+
+void Riscv64Assembler::Bnez(XRegister rs, int32_t offset) {
+ Bne(rs, Zero, offset);
+}
+
+void Riscv64Assembler::Blez(XRegister rt, int32_t offset) {
+ Bge(Zero, rt, offset);
+}
+
+void Riscv64Assembler::Bgez(XRegister rt, int32_t offset) {
+ Bge(rt, Zero, offset);
+}
+
+void Riscv64Assembler::Bltz(XRegister rt, int32_t offset) {
+ Blt(rt, Zero, offset);
+}
+
+void Riscv64Assembler::Bgtz(XRegister rt, int32_t offset) {
+ Blt(Zero, rt, offset);
+}
+
+void Riscv64Assembler::Bgt(XRegister rs, XRegister rt, int32_t offset) {
+ Blt(rt, rs, offset);
+}
+
+void Riscv64Assembler::Ble(XRegister rs, XRegister rt, int32_t offset) {
+ Bge(rt, rs, offset);
+}
+
+void Riscv64Assembler::Bgtu(XRegister rs, XRegister rt, int32_t offset) {
+ Bltu(rt, rs, offset);
+}
+
+void Riscv64Assembler::Bleu(XRegister rs, XRegister rt, int32_t offset) {
+ Bgeu(rt, rs, offset);
+}
+
+void Riscv64Assembler::J(int32_t offset) { Jal(Zero, offset); }
+
+void Riscv64Assembler::Jal(int32_t offset) { Jal(RA, offset); }
+
+void Riscv64Assembler::Jr(XRegister rs) { Jalr(Zero, rs, 0); }
+
+void Riscv64Assembler::Jalr(XRegister rs) { Jalr(RA, rs, 0); }
+
+void Riscv64Assembler::Jalr(XRegister rd, XRegister rs) { Jalr(rd, rs, 0); }
+
+void Riscv64Assembler::Ret() { Jalr(Zero, RA, 0); }
+
+void Riscv64Assembler::RdCycle(XRegister rd) {
+ Csrrs(rd, 0xc00, Zero);
+}
+
+void Riscv64Assembler::RdTime(XRegister rd) {
+ Csrrs(rd, 0xc01, Zero);
+}
+
+void Riscv64Assembler::RdInstret(XRegister rd) {
+ Csrrs(rd, 0xc02, Zero);
+}
+
+void Riscv64Assembler::Csrr(XRegister rd, uint32_t csr) {
+ Csrrs(rd, csr, Zero);
+}
+
+void Riscv64Assembler::Csrw(uint32_t csr, XRegister rs) {
+ Csrrw(Zero, csr, rs);
+}
+
+void Riscv64Assembler::Csrs(uint32_t csr, XRegister rs) {
+ Csrrs(Zero, csr, rs);
+}
+
+void Riscv64Assembler::Csrc(uint32_t csr, XRegister rs) {
+ Csrrc(Zero, csr, rs);
+}
+
+void Riscv64Assembler::Csrwi(uint32_t csr, uint32_t uimm5) {
+ Csrrwi(Zero, csr, uimm5);
+}
+
+void Riscv64Assembler::Csrsi(uint32_t csr, uint32_t uimm5) {
+ Csrrsi(Zero, csr, uimm5);
+}
+
+void Riscv64Assembler::Csrci(uint32_t csr, uint32_t uimm5) {
+ Csrrci(Zero, csr, uimm5);
+}
+
+void Riscv64Assembler::Loadb(XRegister rd, XRegister rs1, int32_t offset) {
+ LoadFromOffset<&Riscv64Assembler::Lb>(rd, rs1, offset);
+}
+
+void Riscv64Assembler::Loadh(XRegister rd, XRegister rs1, int32_t offset) {
+ LoadFromOffset<&Riscv64Assembler::Lh>(rd, rs1, offset);
+}
+
+void Riscv64Assembler::Loadw(XRegister rd, XRegister rs1, int32_t offset) {
+ LoadFromOffset<&Riscv64Assembler::Lw>(rd, rs1, offset);
+}
+
+void Riscv64Assembler::Loadd(XRegister rd, XRegister rs1, int32_t offset) {
+ LoadFromOffset<&Riscv64Assembler::Ld>(rd, rs1, offset);
+}
+
+void Riscv64Assembler::Loadbu(XRegister rd, XRegister rs1, int32_t offset) {
+ LoadFromOffset<&Riscv64Assembler::Lbu>(rd, rs1, offset);
+}
+
+void Riscv64Assembler::Loadhu(XRegister rd, XRegister rs1, int32_t offset) {
+ LoadFromOffset<&Riscv64Assembler::Lhu>(rd, rs1, offset);
+}
+
+void Riscv64Assembler::Loadwu(XRegister rd, XRegister rs1, int32_t offset) {
+ LoadFromOffset<&Riscv64Assembler::Lwu>(rd, rs1, offset);
+}
+
+void Riscv64Assembler::Storeb(XRegister rs2, XRegister rs1, int32_t offset) {
+ StoreToOffset<&Riscv64Assembler::Sb>(rs2, rs1, offset);
+}
+
+void Riscv64Assembler::Storeh(XRegister rs2, XRegister rs1, int32_t offset) {
+ StoreToOffset<&Riscv64Assembler::Sh>(rs2, rs1, offset);
+}
+
+void Riscv64Assembler::Storew(XRegister rs2, XRegister rs1, int32_t offset) {
+ StoreToOffset<&Riscv64Assembler::Sw>(rs2, rs1, offset);
+}
+
+void Riscv64Assembler::Stored(XRegister rs2, XRegister rs1, int32_t offset) {
+ StoreToOffset<&Riscv64Assembler::Sd>(rs2, rs1, offset);
+}
+
+void Riscv64Assembler::FLoadw(FRegister rd, XRegister rs1, int32_t offset) {
+ FLoadFromOffset<&Riscv64Assembler::FLw>(rd, rs1, offset);
+}
+
+void Riscv64Assembler::FLoadd(FRegister rd, XRegister rs1, int32_t offset) {
+ FLoadFromOffset<&Riscv64Assembler::FLd>(rd, rs1, offset);
+}
+
+void Riscv64Assembler::FStorew(FRegister rs2, XRegister rs1, int32_t offset) {
+ FStoreToOffset<&Riscv64Assembler::FSw>(rs2, rs1, offset);
+}
+
+void Riscv64Assembler::FStored(FRegister rs2, XRegister rs1, int32_t offset) {
+ FStoreToOffset<&Riscv64Assembler::FSd>(rs2, rs1, offset);
+}
+
+void Riscv64Assembler::LoadConst32(XRegister rd, int32_t value) {
+ // No need to use a temporary register for 32-bit values.
+ LoadImmediate(rd, value, /*can_use_tmp=*/ false);
+}
+
+void Riscv64Assembler::LoadConst64(XRegister rd, int64_t value) {
+ LoadImmediate(rd, value, /*can_use_tmp=*/ true);
+}
+
+template <typename ValueType, typename Addi, typename AddLarge>
+void AddConstImpl(Riscv64Assembler* assembler,
+ XRegister rd,
+ XRegister rs1,
+ ValueType value,
+ Addi&& addi,
+ AddLarge&& add_large) {
+ ScratchRegisterScope srs(assembler);
+ // A temporary must be available for adjustment even if it's not needed.
+ // However, `rd` can be used as the temporary unless it's the same as `rs1` or SP.
+ DCHECK_IMPLIES(rd == rs1 || rd == SP, srs.AvailableXRegisters() != 0u);
+
+ if (IsInt<12>(value)) {
+ addi(rd, rs1, value);
+ return;
+ }
+
+ constexpr int32_t kPositiveValueSimpleAdjustment = 0x7ff;
+ constexpr int32_t kHighestValueForSimpleAdjustment = 2 * kPositiveValueSimpleAdjustment;
+ constexpr int32_t kNegativeValueSimpleAdjustment = -0x800;
+ constexpr int32_t kLowestValueForSimpleAdjustment = 2 * kNegativeValueSimpleAdjustment;
+
+ if (rd != rs1 && rd != SP) {
+ srs.IncludeXRegister(rd);
+ }
+ XRegister tmp = srs.AllocateXRegister();
+ if (value >= 0 && value <= kHighestValueForSimpleAdjustment) {
+ addi(tmp, rs1, kPositiveValueSimpleAdjustment);
+ addi(rd, tmp, value - kPositiveValueSimpleAdjustment);
+ } else if (value < 0 && value >= kLowestValueForSimpleAdjustment) {
+ addi(tmp, rs1, kNegativeValueSimpleAdjustment);
+ addi(rd, tmp, value - kNegativeValueSimpleAdjustment);
+ } else {
+ add_large(rd, rs1, value, tmp);
+ }
+}
+
+void Riscv64Assembler::AddConst32(XRegister rd, XRegister rs1, int32_t value) {
+ CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u);
+ CHECK_EQ((1u << rd) & available_scratch_core_registers_, 0u);
+ auto addiw = [&](XRegister rd, XRegister rs1, int32_t value) { Addiw(rd, rs1, value); };
+ auto add_large = [&](XRegister rd, XRegister rs1, int32_t value, XRegister tmp) {
+ LoadConst32(tmp, value);
+ Addw(rd, rs1, tmp);
+ };
+ AddConstImpl(this, rd, rs1, value, addiw, add_large);
+}
+
+void Riscv64Assembler::AddConst64(XRegister rd, XRegister rs1, int64_t value) {
+ CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u);
+ CHECK_EQ((1u << rd) & available_scratch_core_registers_, 0u);
+ auto addi = [&](XRegister rd, XRegister rs1, int32_t value) { Addi(rd, rs1, value); };
+ auto add_large = [&](XRegister rd, XRegister rs1, int64_t value, XRegister tmp) {
+ // We may not have another scratch register for `LoadConst64()`, so use `Li()`.
+ // TODO(riscv64): Refactor `LoadImmediate()` so that we can reuse the code to detect
+ // when the code path using the scratch reg is beneficial, and use that path with a
+ // small modification - instead of adding the two parts togeter, add them individually
+ // to the input `rs1`. (This works as long as `rd` is not the same as `tmp`.)
+ Li(tmp, value);
+ Add(rd, rs1, tmp);
+ };
+ AddConstImpl(this, rd, rs1, value, addi, add_large);
+}
+
+void Riscv64Assembler::Beqz(XRegister rs, Riscv64Label* label, bool is_bare) {
+ Beq(rs, Zero, label, is_bare);
+}
+
+void Riscv64Assembler::Bnez(XRegister rs, Riscv64Label* label, bool is_bare) {
+ Bne(rs, Zero, label, is_bare);
+}
+
+void Riscv64Assembler::Blez(XRegister rs, Riscv64Label* label, bool is_bare) {
+ Ble(rs, Zero, label, is_bare);
+}
+
+void Riscv64Assembler::Bgez(XRegister rs, Riscv64Label* label, bool is_bare) {
+ Bge(rs, Zero, label, is_bare);
+}
+
+void Riscv64Assembler::Bltz(XRegister rs, Riscv64Label* label, bool is_bare) {
+ Blt(rs, Zero, label, is_bare);
+}
+
+void Riscv64Assembler::Bgtz(XRegister rs, Riscv64Label* label, bool is_bare) {
+ Bgt(rs, Zero, label, is_bare);
+}
+
+void Riscv64Assembler::Beq(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) {
+ Bcond(label, is_bare, kCondEQ, rs, rt);
+}
+
+void Riscv64Assembler::Bne(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) {
+ Bcond(label, is_bare, kCondNE, rs, rt);
+}
+
+void Riscv64Assembler::Ble(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) {
+ Bcond(label, is_bare, kCondLE, rs, rt);
+}
+
+void Riscv64Assembler::Bge(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) {
+ Bcond(label, is_bare, kCondGE, rs, rt);
+}
+
+void Riscv64Assembler::Blt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) {
+ Bcond(label, is_bare, kCondLT, rs, rt);
+}
+
+void Riscv64Assembler::Bgt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) {
+ Bcond(label, is_bare, kCondGT, rs, rt);
+}
+
+void Riscv64Assembler::Bleu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) {
+ Bcond(label, is_bare, kCondLEU, rs, rt);
+}
+
+void Riscv64Assembler::Bgeu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) {
+ Bcond(label, is_bare, kCondGEU, rs, rt);
+}
+
+void Riscv64Assembler::Bltu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) {
+ Bcond(label, is_bare, kCondLTU, rs, rt);
+}
+
+void Riscv64Assembler::Bgtu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) {
+ Bcond(label, is_bare, kCondGTU, rs, rt);
+}
+
+void Riscv64Assembler::Jal(XRegister rd, Riscv64Label* label, bool is_bare) {
+ Buncond(label, rd, is_bare);
+}
+
+void Riscv64Assembler::J(Riscv64Label* label, bool is_bare) {
+ Jal(Zero, label, is_bare);
+}
+
+void Riscv64Assembler::Jal(Riscv64Label* label, bool is_bare) {
+ Jal(RA, label, is_bare);
+}
+
+void Riscv64Assembler::Loadw(XRegister rd, Literal* literal) {
+ DCHECK_EQ(literal->GetSize(), 4u);
+ LoadLiteral(literal, rd, Branch::kLiteral);
+}
+
+void Riscv64Assembler::Loadwu(XRegister rd, Literal* literal) {
+ DCHECK_EQ(literal->GetSize(), 4u);
+ LoadLiteral(literal, rd, Branch::kLiteralUnsigned);
+}
+
+void Riscv64Assembler::Loadd(XRegister rd, Literal* literal) {
+ DCHECK_EQ(literal->GetSize(), 8u);
+ LoadLiteral(literal, rd, Branch::kLiteralLong);
+}
+
+void Riscv64Assembler::FLoadw(FRegister rd, Literal* literal) {
+ DCHECK_EQ(literal->GetSize(), 4u);
+ LoadLiteral(literal, rd, Branch::kLiteralFloat);
+}
+
+void Riscv64Assembler::FLoadd(FRegister rd, Literal* literal) {
+ DCHECK_EQ(literal->GetSize(), 8u);
+ LoadLiteral(literal, rd, Branch::kLiteralDouble);
+}
+
+void Riscv64Assembler::Unimp() {
+ // TODO(riscv64): use 16-bit zero C.UNIMP once we support compression
+ Emit(0xC0001073);
+}
+
+/////////////////////////////// RV64 MACRO Instructions END ///////////////////////////////
+
+const Riscv64Assembler::Branch::BranchInfo Riscv64Assembler::Branch::branch_info_[] = {
+ // Short branches (can be promoted to longer).
+ {4, 0, Riscv64Assembler::Branch::kOffset13}, // kCondBranch
+ {4, 0, Riscv64Assembler::Branch::kOffset21}, // kUncondBranch
+ {4, 0, Riscv64Assembler::Branch::kOffset21}, // kCall
+ // Short branches (can't be promoted to longer).
+ {4, 0, Riscv64Assembler::Branch::kOffset13}, // kBareCondBranch
+ {4, 0, Riscv64Assembler::Branch::kOffset21}, // kBareUncondBranch
+ {4, 0, Riscv64Assembler::Branch::kOffset21}, // kBareCall
+
+ // Medium branch.
+ {8, 4, Riscv64Assembler::Branch::kOffset21}, // kCondBranch21
+
+ // Long branches.
+ {12, 4, Riscv64Assembler::Branch::kOffset32}, // kLongCondBranch
+ {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLongUncondBranch
+ {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLongCall
+
+ // label.
+ {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLabel
+
+ // literals.
+ {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLiteral
+ {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLiteralUnsigned
+ {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLiteralLong
+ {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLiteralFloat
+ {8, 0, Riscv64Assembler::Branch::kOffset32}, // kLiteralDouble
+};
+
+void Riscv64Assembler::Branch::InitShortOrLong(Riscv64Assembler::Branch::OffsetBits offset_size,
+ Riscv64Assembler::Branch::Type short_type,
+ Riscv64Assembler::Branch::Type long_type,
+ Riscv64Assembler::Branch::Type longest_type) {
+ Riscv64Assembler::Branch::Type type = short_type;
+ if (offset_size > branch_info_[type].offset_size) {
+ type = long_type;
+ if (offset_size > branch_info_[type].offset_size) {
+ type = longest_type;
+ }
+ }
+ type_ = type;
+}
+
+void Riscv64Assembler::Branch::InitializeType(Type initial_type) {
+ OffsetBits offset_size_needed = GetOffsetSizeNeeded(location_, target_);
+
+ switch (initial_type) {
+ case kCondBranch:
+ if (condition_ != kUncond) {
+ InitShortOrLong(offset_size_needed, kCondBranch, kCondBranch21, kLongCondBranch);
+ break;
+ }
+ FALLTHROUGH_INTENDED;
+ case kUncondBranch:
+ InitShortOrLong(offset_size_needed, kUncondBranch, kLongUncondBranch, kLongUncondBranch);
+ break;
+ case kCall:
+ InitShortOrLong(offset_size_needed, kCall, kLongCall, kLongCall);
+ break;
+ case kBareCondBranch:
+ if (condition_ != kUncond) {
+ type_ = kBareCondBranch;
+ CHECK_LE(offset_size_needed, GetOffsetSize());
+ break;
+ }
+ FALLTHROUGH_INTENDED;
+ case kBareUncondBranch:
+ type_ = kBareUncondBranch;
+ CHECK_LE(offset_size_needed, GetOffsetSize());
+ break;
+ case kBareCall:
+ type_ = kBareCall;
+ CHECK_LE(offset_size_needed, GetOffsetSize());
+ break;
+ case kLabel:
+ type_ = initial_type;
+ break;
+ case kLiteral:
+ case kLiteralUnsigned:
+ case kLiteralLong:
+ case kLiteralFloat:
+ case kLiteralDouble:
+ CHECK(!IsResolved());
+ type_ = initial_type;
+ break;
+ default:
+ LOG(FATAL) << "Unexpected branch type " << enum_cast<uint32_t>(initial_type);
+ UNREACHABLE();
+ }
+
+ old_type_ = type_;
+}
+
+bool Riscv64Assembler::Branch::IsNop(BranchCondition condition, XRegister lhs, XRegister rhs) {
+ switch (condition) {
+ case kCondNE:
+ case kCondLT:
+ case kCondGT:
+ case kCondLTU:
+ case kCondGTU:
+ return lhs == rhs;
+ default:
+ return false;
+ }
+}
+
+bool Riscv64Assembler::Branch::IsUncond(BranchCondition condition, XRegister lhs, XRegister rhs) {
+ switch (condition) {
+ case kUncond:
+ return true;
+ case kCondEQ:
+ case kCondGE:
+ case kCondLE:
+ case kCondLEU:
+ case kCondGEU:
+ return lhs == rhs;
+ default:
+ return false;
+ }
+}
+
+Riscv64Assembler::Branch::Branch(uint32_t location, uint32_t target, XRegister rd, bool is_bare)
+ : old_location_(location),
+ location_(location),
+ target_(target),
+ lhs_reg_(rd),
+ rhs_reg_(Zero),
+ freg_(kNoFRegister),
+ condition_(kUncond) {
+ InitializeType(
+ (rd != Zero ? (is_bare ? kBareCall : kCall) : (is_bare ? kBareUncondBranch : kUncondBranch)));
+}
+
+Riscv64Assembler::Branch::Branch(uint32_t location,
+ uint32_t target,
+ Riscv64Assembler::BranchCondition condition,
+ XRegister lhs_reg,
+ XRegister rhs_reg,
+ bool is_bare)
+ : old_location_(location),
+ location_(location),
+ target_(target),
+ lhs_reg_(lhs_reg),
+ rhs_reg_(rhs_reg),
+ freg_(kNoFRegister),
+ condition_(condition) {
+ DCHECK_NE(condition, kUncond);
+ DCHECK(!IsNop(condition, lhs_reg, rhs_reg));
+ DCHECK(!IsUncond(condition, lhs_reg, rhs_reg));
+ InitializeType(is_bare ? kBareCondBranch : kCondBranch);
+}
+
+Riscv64Assembler::Branch::Branch(uint32_t location,
+ uint32_t target,
+ XRegister rd,
+ Type label_or_literal_type)
+ : old_location_(location),
+ location_(location),
+ target_(target),
+ lhs_reg_(rd),
+ rhs_reg_(Zero),
+ freg_(kNoFRegister),
+ condition_(kUncond) {
+ CHECK_NE(rd , Zero);
+ InitializeType(label_or_literal_type);
+}
+
+Riscv64Assembler::Branch::Branch(uint32_t location,
+ uint32_t target,
+ FRegister rd,
+ Type literal_type)
+ : old_location_(location),
+ location_(location),
+ target_(target),
+ lhs_reg_(Zero),
+ rhs_reg_(Zero),
+ freg_(rd),
+ condition_(kUncond) {
+ InitializeType(literal_type);
+}
+
+Riscv64Assembler::BranchCondition Riscv64Assembler::Branch::OppositeCondition(
+ Riscv64Assembler::BranchCondition cond) {
+ switch (cond) {
+ case kCondEQ:
+ return kCondNE;
+ case kCondNE:
+ return kCondEQ;
+ case kCondLT:
+ return kCondGE;
+ case kCondGE:
+ return kCondLT;
+ case kCondLE:
+ return kCondGT;
+ case kCondGT:
+ return kCondLE;
+ case kCondLTU:
+ return kCondGEU;
+ case kCondGEU:
+ return kCondLTU;
+ case kCondLEU:
+ return kCondGTU;
+ case kCondGTU:
+ return kCondLEU;
+ case kUncond:
+ LOG(FATAL) << "Unexpected branch condition " << enum_cast<uint32_t>(cond);
+ UNREACHABLE();
+ }
+}
+
+Riscv64Assembler::Branch::Type Riscv64Assembler::Branch::GetType() const { return type_; }
+
+Riscv64Assembler::BranchCondition Riscv64Assembler::Branch::GetCondition() const {
+ return condition_;
+}
+
+XRegister Riscv64Assembler::Branch::GetLeftRegister() const { return lhs_reg_; }
+
+XRegister Riscv64Assembler::Branch::GetRightRegister() const { return rhs_reg_; }
+
+FRegister Riscv64Assembler::Branch::GetFRegister() const { return freg_; }
+
+uint32_t Riscv64Assembler::Branch::GetTarget() const { return target_; }
+
+uint32_t Riscv64Assembler::Branch::GetLocation() const { return location_; }
+
+uint32_t Riscv64Assembler::Branch::GetOldLocation() const { return old_location_; }
+
+uint32_t Riscv64Assembler::Branch::GetLength() const { return branch_info_[type_].length; }
+
+uint32_t Riscv64Assembler::Branch::GetOldLength() const { return branch_info_[old_type_].length; }
+
+uint32_t Riscv64Assembler::Branch::GetEndLocation() const { return GetLocation() + GetLength(); }
+
+uint32_t Riscv64Assembler::Branch::GetOldEndLocation() const {
+ return GetOldLocation() + GetOldLength();
+}
+
+bool Riscv64Assembler::Branch::IsBare() const {
+ switch (type_) {
+ case kBareUncondBranch:
+ case kBareCondBranch:
+ case kBareCall:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool Riscv64Assembler::Branch::IsResolved() const { return target_ != kUnresolved; }
+
+Riscv64Assembler::Branch::OffsetBits Riscv64Assembler::Branch::GetOffsetSize() const {
+ return branch_info_[type_].offset_size;
+}
+
+Riscv64Assembler::Branch::OffsetBits Riscv64Assembler::Branch::GetOffsetSizeNeeded(
+ uint32_t location, uint32_t target) {
+ // For unresolved targets assume the shortest encoding
+ // (later it will be made longer if needed).
+ if (target == kUnresolved) {
+ return kOffset13;
+ }
+ int64_t distance = static_cast<int64_t>(target) - location;
+ if (IsInt<kOffset13>(distance)) {
+ return kOffset13;
+ } else if (IsInt<kOffset21>(distance)) {
+ return kOffset21;
+ } else {
+ return kOffset32;
+ }
+}
+
+void Riscv64Assembler::Branch::Resolve(uint32_t target) { target_ = target; }
+
+void Riscv64Assembler::Branch::Relocate(uint32_t expand_location, uint32_t delta) {
+ // All targets should be resolved before we start promoting branches.
+ DCHECK(IsResolved());
+ if (location_ > expand_location) {
+ location_ += delta;
+ }
+ if (target_ > expand_location) {
+ target_ += delta;
+ }
+}
+
+uint32_t Riscv64Assembler::Branch::PromoteIfNeeded() {
+ // All targets should be resolved before we start promoting branches.
+ DCHECK(IsResolved());
+ Type old_type = type_;
+ switch (type_) {
+ // Short branches (can be promoted to longer).
+ case kCondBranch: {
+ OffsetBits needed_size = GetOffsetSizeNeeded(GetOffsetLocation(), target_);
+ if (needed_size <= GetOffsetSize()) {
+ return 0u;
+ }
+ // The offset remains the same for `kCondBranch21` for forward branches.
+ DCHECK_EQ(branch_info_[kCondBranch21].length - branch_info_[kCondBranch21].pc_offset,
+ branch_info_[kCondBranch].length - branch_info_[kCondBranch].pc_offset);
+ if (target_ <= location_) {
+ // Calculate the needed size for kCondBranch21.
+ needed_size =
+ GetOffsetSizeNeeded(location_ + branch_info_[kCondBranch21].pc_offset, target_);
+ }
+ type_ = (needed_size <= branch_info_[kCondBranch21].offset_size)
+ ? kCondBranch21
+ : kLongCondBranch;
+ break;
+ }
+ case kUncondBranch:
+ if (GetOffsetSizeNeeded(GetOffsetLocation(), target_) <= GetOffsetSize()) {
+ return 0u;
+ }
+ type_ = kLongUncondBranch;
+ break;
+ case kCall:
+ if (GetOffsetSizeNeeded(GetOffsetLocation(), target_) <= GetOffsetSize()) {
+ return 0u;
+ }
+ type_ = kLongCall;
+ break;
+ // Medium branch (can be promoted to long).
+ case kCondBranch21:
+ if (GetOffsetSizeNeeded(GetOffsetLocation(), target_) <= GetOffsetSize()) {
+ return 0u;
+ }
+ type_ = kLongCondBranch;
+ break;
+ default:
+ // Other branch types cannot be promoted.
+ DCHECK_LE(GetOffsetSizeNeeded(GetOffsetLocation(), target_), GetOffsetSize()) << type_;
+ return 0u;
+ }
+ DCHECK(type_ != old_type);
+ DCHECK_GT(branch_info_[type_].length, branch_info_[old_type].length);
+ return branch_info_[type_].length - branch_info_[old_type].length;
+}
+
+uint32_t Riscv64Assembler::Branch::GetOffsetLocation() const {
+ return location_ + branch_info_[type_].pc_offset;
+}
+
+int32_t Riscv64Assembler::Branch::GetOffset() const {
+ CHECK(IsResolved());
+ // Calculate the byte distance between instructions and also account for
+ // different PC-relative origins.
+ uint32_t offset_location = GetOffsetLocation();
+ int32_t offset = static_cast<int32_t>(target_ - offset_location);
+ DCHECK_EQ(offset, static_cast<int64_t>(target_) - static_cast<int64_t>(offset_location));
+ return offset;
+}
+
+void Riscv64Assembler::EmitBcond(BranchCondition cond,
+ XRegister rs,
+ XRegister rt,
+ int32_t offset) {
+ switch (cond) {
+#define DEFINE_CASE(COND, cond) \
+ case kCond##COND: \
+ B##cond(rs, rt, offset); \
+ break;
+ DEFINE_CASE(EQ, eq)
+ DEFINE_CASE(NE, ne)
+ DEFINE_CASE(LT, lt)
+ DEFINE_CASE(GE, ge)
+ DEFINE_CASE(LE, le)
+ DEFINE_CASE(GT, gt)
+ DEFINE_CASE(LTU, ltu)
+ DEFINE_CASE(GEU, geu)
+ DEFINE_CASE(LEU, leu)
+ DEFINE_CASE(GTU, gtu)
+#undef DEFINE_CASE
+ case kUncond:
+ LOG(FATAL) << "Unexpected branch condition " << enum_cast<uint32_t>(cond);
+ UNREACHABLE();
+ }
+}
+
+void Riscv64Assembler::EmitBranch(Riscv64Assembler::Branch* branch) {
+ CHECK(overwriting_);
+ overwrite_location_ = branch->GetLocation();
+ const int32_t offset = branch->GetOffset();
+ BranchCondition condition = branch->GetCondition();
+ XRegister lhs = branch->GetLeftRegister();
+ XRegister rhs = branch->GetRightRegister();
+
+ auto emit_auipc_and_next = [&](XRegister reg, auto next) {
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ auto [imm20, short_offset] = SplitOffset(offset);
+ Auipc(reg, imm20);
+ next(short_offset);
+ };
+
+ switch (branch->GetType()) {
+ // Short branches.
+ case Branch::kUncondBranch:
+ case Branch::kBareUncondBranch:
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ J(offset);
+ break;
+ case Branch::kCondBranch:
+ case Branch::kBareCondBranch:
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ EmitBcond(condition, lhs, rhs, offset);
+ break;
+ case Branch::kCall:
+ case Branch::kBareCall:
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ DCHECK(lhs != Zero);
+ Jal(lhs, offset);
+ break;
+
+ // Medium branch.
+ case Branch::kCondBranch21:
+ EmitBcond(Branch::OppositeCondition(condition), lhs, rhs, branch->GetLength());
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ J(offset);
+ break;
+
+ // Long branches.
+ case Branch::kLongCondBranch:
+ EmitBcond(Branch::OppositeCondition(condition), lhs, rhs, branch->GetLength());
+ FALLTHROUGH_INTENDED;
+ case Branch::kLongUncondBranch:
+ emit_auipc_and_next(TMP, [&](int32_t short_offset) { Jalr(Zero, TMP, short_offset); });
+ break;
+ case Branch::kLongCall:
+ DCHECK(lhs != Zero);
+ emit_auipc_and_next(lhs, [&](int32_t short_offset) { Jalr(lhs, lhs, short_offset); });
+ break;
+
+ // label.
+ case Branch::kLabel:
+ emit_auipc_and_next(lhs, [&](int32_t short_offset) { Addi(lhs, lhs, short_offset); });
+ break;
+ // literals.
+ case Branch::kLiteral:
+ emit_auipc_and_next(lhs, [&](int32_t short_offset) { Lw(lhs, lhs, short_offset); });
+ break;
+ case Branch::kLiteralUnsigned:
+ emit_auipc_and_next(lhs, [&](int32_t short_offset) { Lwu(lhs, lhs, short_offset); });
+ break;
+ case Branch::kLiteralLong:
+ emit_auipc_and_next(lhs, [&](int32_t short_offset) { Ld(lhs, lhs, short_offset); });
+ break;
+ case Branch::kLiteralFloat:
+ emit_auipc_and_next(
+ TMP, [&](int32_t short_offset) { FLw(branch->GetFRegister(), TMP, short_offset); });
+ break;
+ case Branch::kLiteralDouble:
+ emit_auipc_and_next(
+ TMP, [&](int32_t short_offset) { FLd(branch->GetFRegister(), TMP, short_offset); });
+ break;
+ }
+ CHECK_EQ(overwrite_location_, branch->GetEndLocation());
+ CHECK_LE(branch->GetLength(), static_cast<uint32_t>(Branch::kMaxBranchLength));
+}
+
+void Riscv64Assembler::EmitBranches() {
+ CHECK(!overwriting_);
+ // Switch from appending instructions at the end of the buffer to overwriting
+ // existing instructions (branch placeholders) in the buffer.
+ overwriting_ = true;
+ for (auto& branch : branches_) {
+ EmitBranch(&branch);
+ }
+ overwriting_ = false;
+}
+
+void Riscv64Assembler::FinalizeLabeledBranch(Riscv64Label* label) {
+ // TODO(riscv64): Support "C" Standard Extension - length may not be a multiple of 4.
+ DCHECK_ALIGNED(branches_.back().GetLength(), sizeof(uint32_t));
+ uint32_t length = branches_.back().GetLength() / sizeof(uint32_t);
+ if (!label->IsBound()) {
+ // Branch forward (to a following label), distance is unknown.
+ // The first branch forward will contain 0, serving as the terminator of
+ // the list of forward-reaching branches.
+ Emit(label->position_);
+ length--;
+ // Now make the label object point to this branch
+ // (this forms a linked list of branches preceding this label).
+ uint32_t branch_id = branches_.size() - 1;
+ label->LinkTo(branch_id);
+ }
+ // Reserve space for the branch.
+ for (; length != 0u; --length) {
+ Nop();
+ }
+}
+
+void Riscv64Assembler::Bcond(
+ Riscv64Label* label, bool is_bare, BranchCondition condition, XRegister lhs, XRegister rhs) {
+ // TODO(riscv64): Should an assembler perform these optimizations, or should we remove them?
+ // If lhs = rhs, this can be a NOP.
+ if (Branch::IsNop(condition, lhs, rhs)) {
+ return;
+ }
+ if (Branch::IsUncond(condition, lhs, rhs)) {
+ Buncond(label, Zero, is_bare);
+ return;
+ }
+
+ uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+ branches_.emplace_back(buffer_.Size(), target, condition, lhs, rhs, is_bare);
+ FinalizeLabeledBranch(label);
+}
+
+void Riscv64Assembler::Buncond(Riscv64Label* label, XRegister rd, bool is_bare) {
+ uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+ branches_.emplace_back(buffer_.Size(), target, rd, is_bare);
+ FinalizeLabeledBranch(label);
+}
+
+template <typename XRegisterOrFRegister>
+void Riscv64Assembler::LoadLiteral(Literal* literal,
+ XRegisterOrFRegister rd,
+ Branch::Type literal_type) {
+ Riscv64Label* label = literal->GetLabel();
+ DCHECK(!label->IsBound());
+ branches_.emplace_back(buffer_.Size(), Branch::kUnresolved, rd, literal_type);
+ FinalizeLabeledBranch(label);
+}
+
+Riscv64Assembler::Branch* Riscv64Assembler::GetBranch(uint32_t branch_id) {
+ CHECK_LT(branch_id, branches_.size());
+ return &branches_[branch_id];
+}
+
+const Riscv64Assembler::Branch* Riscv64Assembler::GetBranch(uint32_t branch_id) const {
+ CHECK_LT(branch_id, branches_.size());
+ return &branches_[branch_id];
+}
+
+void Riscv64Assembler::Bind(Riscv64Label* label) {
+ CHECK(!label->IsBound());
+ uint32_t bound_pc = buffer_.Size();
+
+ // Walk the list of branches referring to and preceding this label.
+ // Store the previously unknown target addresses in them.
+ while (label->IsLinked()) {
+ uint32_t branch_id = label->Position();
+ Branch* branch = GetBranch(branch_id);
+ branch->Resolve(bound_pc);
+
+ uint32_t branch_location = branch->GetLocation();
+ // Extract the location of the previous branch in the list (walking the list backwards;
+ // the previous branch ID was stored in the space reserved for this branch).
+ uint32_t prev = buffer_.Load<uint32_t>(branch_location);
+
+ // On to the previous branch in the list...
+ label->position_ = prev;
+ }
+
+ // Now make the label object contain its own location (relative to the end of the preceding
+ // branch, if any; it will be used by the branches referring to and following this label).
+ uint32_t prev_branch_id = Riscv64Label::kNoPrevBranchId;
+ if (!branches_.empty()) {
+ prev_branch_id = branches_.size() - 1u;
+ const Branch* prev_branch = GetBranch(prev_branch_id);
+ bound_pc -= prev_branch->GetEndLocation();
+ }
+ label->prev_branch_id_ = prev_branch_id;
+ label->BindTo(bound_pc);
+}
+
+void Riscv64Assembler::LoadLabelAddress(XRegister rd, Riscv64Label* label) {
+ DCHECK_NE(rd, Zero);
+ uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+ branches_.emplace_back(buffer_.Size(), target, rd, Branch::kLabel);
+ FinalizeLabeledBranch(label);
+}
+
+Literal* Riscv64Assembler::NewLiteral(size_t size, const uint8_t* data) {
+ // We don't support byte and half-word literals.
+ if (size == 4u) {
+ literals_.emplace_back(size, data);
+ return &literals_.back();
+ } else {
+ DCHECK_EQ(size, 8u);
+ long_literals_.emplace_back(size, data);
+ return &long_literals_.back();
+ }
+}
+
+JumpTable* Riscv64Assembler::CreateJumpTable(ArenaVector<Riscv64Label*>&& labels) {
+ jump_tables_.emplace_back(std::move(labels));
+ JumpTable* table = &jump_tables_.back();
+ DCHECK(!table->GetLabel()->IsBound());
+ return table;
+}
+
+uint32_t Riscv64Assembler::GetLabelLocation(const Riscv64Label* label) const {
+ CHECK(label->IsBound());
+ uint32_t target = label->Position();
+ if (label->prev_branch_id_ != Riscv64Label::kNoPrevBranchId) {
+ // Get label location based on the branch preceding it.
+ const Branch* prev_branch = GetBranch(label->prev_branch_id_);
+ target += prev_branch->GetEndLocation();
+ }
+ return target;
+}
+
+uint32_t Riscv64Assembler::GetAdjustedPosition(uint32_t old_position) {
+ // We can reconstruct the adjustment by going through all the branches from the beginning
+ // up to the `old_position`. Since we expect `GetAdjustedPosition()` to be called in a loop
+ // with increasing `old_position`, we can use the data from last `GetAdjustedPosition()` to
+ // continue where we left off and the whole loop should be O(m+n) where m is the number
+ // of positions to adjust and n is the number of branches.
+ if (old_position < last_old_position_) {
+ last_position_adjustment_ = 0;
+ last_old_position_ = 0;
+ last_branch_id_ = 0;
+ }
+ while (last_branch_id_ != branches_.size()) {
+ const Branch* branch = GetBranch(last_branch_id_);
+ if (branch->GetLocation() >= old_position + last_position_adjustment_) {
+ break;
+ }
+ last_position_adjustment_ += branch->GetLength() - branch->GetOldLength();
+ ++last_branch_id_;
+ }
+ last_old_position_ = old_position;
+ return old_position + last_position_adjustment_;
+}
+
+void Riscv64Assembler::ReserveJumpTableSpace() {
+ if (!jump_tables_.empty()) {
+ for (JumpTable& table : jump_tables_) {
+ Riscv64Label* label = table.GetLabel();
+ Bind(label);
+
+ // Bulk ensure capacity, as this may be large.
+ size_t orig_size = buffer_.Size();
+ size_t required_capacity = orig_size + table.GetSize();
+ if (required_capacity > buffer_.Capacity()) {
+ buffer_.ExtendCapacity(required_capacity);
+ }
+#ifndef NDEBUG
+ buffer_.has_ensured_capacity_ = true;
+#endif
+
+ // Fill the space with placeholder data as the data is not final
+ // until the branches have been promoted. And we shouldn't
+ // be moving uninitialized data during branch promotion.
+ for (size_t cnt = table.GetData().size(), i = 0; i < cnt; ++i) {
+ buffer_.Emit<uint32_t>(0x1abe1234u);
+ }
+
+#ifndef NDEBUG
+ buffer_.has_ensured_capacity_ = false;
+#endif
+ }
+ }
+}
+
+void Riscv64Assembler::PromoteBranches() {
+ // Promote short branches to long as necessary.
+ bool changed;
+ do {
+ changed = false;
+ for (auto& branch : branches_) {
+ CHECK(branch.IsResolved());
+ uint32_t delta = branch.PromoteIfNeeded();
+ // If this branch has been promoted and needs to expand in size,
+ // relocate all branches by the expansion size.
+ if (delta != 0u) {
+ changed = true;
+ uint32_t expand_location = branch.GetLocation();
+ for (auto& branch2 : branches_) {
+ branch2.Relocate(expand_location, delta);
+ }
+ }
+ }
+ } while (changed);
+
+ // Account for branch expansion by resizing the code buffer
+ // and moving the code in it to its final location.
+ size_t branch_count = branches_.size();
+ if (branch_count > 0) {
+ // Resize.
+ Branch& last_branch = branches_[branch_count - 1];
+ uint32_t size_delta = last_branch.GetEndLocation() - last_branch.GetOldEndLocation();
+ uint32_t old_size = buffer_.Size();
+ buffer_.Resize(old_size + size_delta);
+ // Move the code residing between branch placeholders.
+ uint32_t end = old_size;
+ for (size_t i = branch_count; i > 0;) {
+ Branch& branch = branches_[--i];
+ uint32_t size = end - branch.GetOldEndLocation();
+ buffer_.Move(branch.GetEndLocation(), branch.GetOldEndLocation(), size);
+ end = branch.GetOldLocation();
+ }
+ }
+
+ // Align 64-bit literals by moving them up by 4 bytes if needed.
+ // This can increase the PC-relative distance but all literals are accessed with AUIPC+Load(imm12)
+ // without branch promotion, so this late adjustment cannot take them out of instruction range.
+ if (!long_literals_.empty()) {
+ uint32_t first_literal_location = GetLabelLocation(long_literals_.front().GetLabel());
+ size_t lit_size = long_literals_.size() * sizeof(uint64_t);
+ size_t buf_size = buffer_.Size();
+ // 64-bit literals must be at the very end of the buffer.
+ CHECK_EQ(first_literal_location + lit_size, buf_size);
+ if (!IsAligned<sizeof(uint64_t)>(first_literal_location)) {
+ // Insert the padding.
+ buffer_.Resize(buf_size + sizeof(uint32_t));
+ buffer_.Move(first_literal_location + sizeof(uint32_t), first_literal_location, lit_size);
+ DCHECK(!overwriting_);
+ overwriting_ = true;
+ overwrite_location_ = first_literal_location;
+ Emit(0); // Illegal instruction.
+ overwriting_ = false;
+ // Increase target addresses in literal and address loads by 4 bytes in order for correct
+ // offsets from PC to be generated.
+ for (auto& branch : branches_) {
+ uint32_t target = branch.GetTarget();
+ if (target >= first_literal_location) {
+ branch.Resolve(target + sizeof(uint32_t));
+ }
+ }
+ // If after this we ever call GetLabelLocation() to get the location of a 64-bit literal,
+ // we need to adjust the location of the literal's label as well.
+ for (Literal& literal : long_literals_) {
+ // Bound label's position is negative, hence decrementing it instead of incrementing.
+ literal.GetLabel()->position_ -= sizeof(uint32_t);
+ }
+ }
+ }
+}
+
+void Riscv64Assembler::PatchCFI() {
+ if (cfi().NumberOfDelayedAdvancePCs() == 0u) {
+ return;
+ }
+
+ using DelayedAdvancePC = DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC;
+ const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC();
+ const std::vector<uint8_t>& old_stream = data.first;
+ const std::vector<DelayedAdvancePC>& advances = data.second;
+
+ // Refill our data buffer with patched opcodes.
+ static constexpr size_t kExtraSpace = 16; // Not every PC advance can be encoded in one byte.
+ cfi().ReserveCFIStream(old_stream.size() + advances.size() + kExtraSpace);
+ size_t stream_pos = 0;
+ for (const DelayedAdvancePC& advance : advances) {
+ DCHECK_GE(advance.stream_pos, stream_pos);
+ // Copy old data up to the point where advance was issued.
+ cfi().AppendRawData(old_stream, stream_pos, advance.stream_pos);
+ stream_pos = advance.stream_pos;
+ // Insert the advance command with its final offset.
+ size_t final_pc = GetAdjustedPosition(advance.pc);
+ cfi().AdvancePC(final_pc);
+ }
+ // Copy the final segment if any.
+ cfi().AppendRawData(old_stream, stream_pos, old_stream.size());
+}
+
+void Riscv64Assembler::EmitJumpTables() {
+ if (!jump_tables_.empty()) {
+ CHECK(!overwriting_);
+ // Switch from appending instructions at the end of the buffer to overwriting
+ // existing instructions (here, jump tables) in the buffer.
+ overwriting_ = true;
+
+ for (JumpTable& table : jump_tables_) {
+ Riscv64Label* table_label = table.GetLabel();
+ uint32_t start = GetLabelLocation(table_label);
+ overwrite_location_ = start;
+
+ for (Riscv64Label* target : table.GetData()) {
+ CHECK_EQ(buffer_.Load<uint32_t>(overwrite_location_), 0x1abe1234u);
+ // The table will contain target addresses relative to the table start.
+ uint32_t offset = GetLabelLocation(target) - start;
+ Emit(offset);
+ }
+ }
+
+ overwriting_ = false;
+ }
+}
+
+void Riscv64Assembler::EmitLiterals() {
+ if (!literals_.empty()) {
+ for (Literal& literal : literals_) {
+ Riscv64Label* label = literal.GetLabel();
+ Bind(label);
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ DCHECK_EQ(literal.GetSize(), 4u);
+ for (size_t i = 0, size = literal.GetSize(); i != size; ++i) {
+ buffer_.Emit<uint8_t>(literal.GetData()[i]);
+ }
+ }
+ }
+ if (!long_literals_.empty()) {
+ // These need to be 8-byte-aligned but we shall add the alignment padding after the branch
+ // promotion, if needed. Since all literals are accessed with AUIPC+Load(imm12) without branch
+ // promotion, this late adjustment cannot take long literals out of instruction range.
+ for (Literal& literal : long_literals_) {
+ Riscv64Label* label = literal.GetLabel();
+ Bind(label);
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ DCHECK_EQ(literal.GetSize(), 8u);
+ for (size_t i = 0, size = literal.GetSize(); i != size; ++i) {
+ buffer_.Emit<uint8_t>(literal.GetData()[i]);
+ }
+ }
+ }
+}
+
+// This method is used to adjust the base register and offset pair for
+// a load/store when the offset doesn't fit into 12-bit signed integer.
+void Riscv64Assembler::AdjustBaseAndOffset(XRegister& base,
+ int32_t& offset,
+ ScratchRegisterScope& srs) {
+ // A scratch register must be available for adjustment even if it's not needed.
+ CHECK_NE(srs.AvailableXRegisters(), 0u);
+ if (IsInt<12>(offset)) {
+ return;
+ }
+
+ constexpr int32_t kPositiveOffsetMaxSimpleAdjustment = 0x7ff;
+ constexpr int32_t kHighestOffsetForSimpleAdjustment = 2 * kPositiveOffsetMaxSimpleAdjustment;
+ constexpr int32_t kPositiveOffsetSimpleAdjustmentAligned8 =
+ RoundDown(kPositiveOffsetMaxSimpleAdjustment, 8);
+ constexpr int32_t kPositiveOffsetSimpleAdjustmentAligned4 =
+ RoundDown(kPositiveOffsetMaxSimpleAdjustment, 4);
+ constexpr int32_t kNegativeOffsetSimpleAdjustment = -0x800;
+ constexpr int32_t kLowestOffsetForSimpleAdjustment = 2 * kNegativeOffsetSimpleAdjustment;
+
+ XRegister tmp = srs.AllocateXRegister();
+ if (offset >= 0 && offset <= kHighestOffsetForSimpleAdjustment) {
+ // Make the adjustment 8-byte aligned (0x7f8) except for offsets that cannot be reached
+ // with this adjustment, then try 4-byte alignment, then just half of the offset.
+ int32_t adjustment = IsInt<12>(offset - kPositiveOffsetSimpleAdjustmentAligned8)
+ ? kPositiveOffsetSimpleAdjustmentAligned8
+ : IsInt<12>(offset - kPositiveOffsetSimpleAdjustmentAligned4)
+ ? kPositiveOffsetSimpleAdjustmentAligned4
+ : offset / 2;
+ DCHECK(IsInt<12>(adjustment));
+ Addi(tmp, base, adjustment);
+ offset -= adjustment;
+ } else if (offset < 0 && offset >= kLowestOffsetForSimpleAdjustment) {
+ Addi(tmp, base, kNegativeOffsetSimpleAdjustment);
+ offset -= kNegativeOffsetSimpleAdjustment;
+ } else if (offset >= 0x7ffff800) {
+ // Support even large offsets outside the range supported by `SplitOffset()`.
+ LoadConst32(tmp, offset);
+ Add(tmp, tmp, base);
+ offset = 0;
+ } else {
+ auto [imm20, short_offset] = SplitOffset(offset);
+ Lui(tmp, imm20);
+ Add(tmp, tmp, base);
+ offset = short_offset;
+ }
+ base = tmp;
+}
+
+template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)>
+void Riscv64Assembler::LoadFromOffset(XRegister rd, XRegister rs1, int32_t offset) {
+ CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u);
+ CHECK_EQ((1u << rd) & available_scratch_core_registers_, 0u);
+ ScratchRegisterScope srs(this);
+ // If `rd` differs from `rs1`, allow using it as a temporary if needed.
+ if (rd != rs1) {
+ srs.IncludeXRegister(rd);
+ }
+ AdjustBaseAndOffset(rs1, offset, srs);
+ (this->*insn)(rd, rs1, offset);
+}
+
+template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)>
+void Riscv64Assembler::StoreToOffset(XRegister rs2, XRegister rs1, int32_t offset) {
+ CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u);
+ CHECK_EQ((1u << rs2) & available_scratch_core_registers_, 0u);
+ ScratchRegisterScope srs(this);
+ AdjustBaseAndOffset(rs1, offset, srs);
+ (this->*insn)(rs2, rs1, offset);
+}
+
+template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)>
+void Riscv64Assembler::FLoadFromOffset(FRegister rd, XRegister rs1, int32_t offset) {
+ CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u);
+ ScratchRegisterScope srs(this);
+ AdjustBaseAndOffset(rs1, offset, srs);
+ (this->*insn)(rd, rs1, offset);
+}
+
+template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)>
+void Riscv64Assembler::FStoreToOffset(FRegister rs2, XRegister rs1, int32_t offset) {
+ CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u);
+ ScratchRegisterScope srs(this);
+ AdjustBaseAndOffset(rs1, offset, srs);
+ (this->*insn)(rs2, rs1, offset);
+}
+
+void Riscv64Assembler::LoadImmediate(XRegister rd, int64_t imm, bool can_use_tmp) {
+ CHECK_EQ((1u << rd) & available_scratch_core_registers_, 0u);
+ ScratchRegisterScope srs(this);
+ CHECK_IMPLIES(can_use_tmp, srs.AvailableXRegisters() != 0u);
+
+ // Helper lambdas.
+ auto addi = [&](XRegister rd, XRegister rs, int32_t imm) { Addi(rd, rs, imm); };
+ auto addiw = [&](XRegister rd, XRegister rs, int32_t imm) { Addiw(rd, rs, imm); };
+ auto slli = [&](XRegister rd, XRegister rs, int32_t imm) { Slli(rd, rs, imm); };
+ auto lui = [&](XRegister rd, uint32_t imm20) { Lui(rd, imm20); };
+
+ // Simple LUI+ADDI/W can handle value range [-0x80000800, 0x7fffffff].
+ auto is_simple_li_value = [](int64_t value) {
+ return value >= INT64_C(-0x80000800) && value <= INT64_C(0x7fffffff);
+ };
+ auto emit_simple_li_helper = [&](XRegister rd,
+ int64_t value,
+ auto&& addi,
+ auto&& addiw,
+ auto&& slli,
+ auto&& lui) {
+ DCHECK(is_simple_li_value(value)) << "0x" << std::hex << value;
+ if (IsInt<12>(value)) {
+ addi(rd, Zero, value);
+ } else if (CTZ(value) < 12 && IsInt(6 + CTZ(value), value)) {
+ // This path yields two 16-bit instructions with the "C" Standard Extension.
+ addi(rd, Zero, value >> CTZ(value));
+ slli(rd, rd, CTZ(value));
+ } else if (value < INT64_C(-0x80000000)) {
+ int32_t small_value = dchecked_integral_cast<int32_t>(value - INT64_C(-0x80000000));
+ DCHECK(IsInt<12>(small_value));
+ DCHECK_LT(small_value, 0);
+ lui(rd, 1u << 19);
+ addi(rd, rd, small_value);
+ } else {
+ DCHECK(IsInt<32>(value));
+ // Note: Similar to `SplitOffset()` but we can target the full 32-bit range with ADDIW.
+ int64_t near_value = (value + 0x800) & ~0xfff;
+ int32_t small_value = value - near_value;
+ DCHECK(IsInt<12>(small_value));
+ uint32_t imm20 = static_cast<uint32_t>(near_value) >> 12;
+ DCHECK_NE(imm20, 0u); // Small values are handled above.
+ lui(rd, imm20);
+ if (small_value != 0) {
+ addiw(rd, rd, small_value);
+ }
+ }
+ };
+ auto emit_simple_li = [&](XRegister rd, int64_t value) {
+ emit_simple_li_helper(rd, value, addi, addiw, slli, lui);
+ };
+ auto count_simple_li_instructions = [&](int64_t value) {
+ size_t num_instructions = 0u;
+ auto count_rri = [&](XRegister, XRegister, int32_t) { ++num_instructions; };
+ auto count_ru = [&](XRegister, uint32_t) { ++num_instructions; };
+ emit_simple_li_helper(Zero, value, count_rri, count_rri, count_rri, count_ru);
+ return num_instructions;
+ };
+
+ // If LUI+ADDI/W is not enough, we can generate up to 3 SLLI+ADDI afterwards (up to 8 instructions
+ // total). The ADDI from the first SLLI+ADDI pair can be a no-op.
+ auto emit_with_slli_addi_helper = [&](XRegister rd,
+ int64_t value,
+ auto&& addi,
+ auto&& addiw,
+ auto&& slli,
+ auto&& lui) {
+ static constexpr size_t kMaxNumSllAddi = 3u;
+ int32_t addi_values[kMaxNumSllAddi];
+ size_t sll_shamts[kMaxNumSllAddi];
+ size_t num_sll_addi = 0u;
+ while (!is_simple_li_value(value)) {
+ DCHECK_LT(num_sll_addi, kMaxNumSllAddi);
+ // Prepare sign-extended low 12 bits for ADDI.
+ int64_t addi_value = (value & 0xfff) - ((value & 0x800) << 1);
+ DCHECK(IsInt<12>(addi_value));
+ int64_t remaining = value - addi_value;
+ size_t shamt = CTZ(remaining);
+ DCHECK_GE(shamt, 12u);
+ addi_values[num_sll_addi] = addi_value;
+ sll_shamts[num_sll_addi] = shamt;
+ value = remaining >> shamt;
+ ++num_sll_addi;
+ }
+ if (num_sll_addi != 0u && IsInt<20>(value) && !IsInt<12>(value)) {
+ // If `sll_shamts[num_sll_addi - 1u]` was only 12, we would have stopped
+ // the decomposition a step earlier with smaller `num_sll_addi`.
+ DCHECK_GT(sll_shamts[num_sll_addi - 1u], 12u);
+ // Emit the signed 20-bit value with LUI and reduce the SLLI shamt by 12 to compensate.
+ sll_shamts[num_sll_addi - 1u] -= 12u;
+ lui(rd, dchecked_integral_cast<uint32_t>(value & 0xfffff));
+ } else {
+ emit_simple_li_helper(rd, value, addi, addiw, slli, lui);
+ }
+ for (size_t i = num_sll_addi; i != 0u; ) {
+ --i;
+ slli(rd, rd, sll_shamts[i]);
+ if (addi_values[i] != 0) {
+ addi(rd, rd, addi_values[i]);
+ }
+ }
+ };
+ auto emit_with_slli_addi = [&](XRegister rd, int64_t value) {
+ emit_with_slli_addi_helper(rd, value, addi, addiw, slli, lui);
+ };
+ auto count_instructions_with_slli_addi = [&](int64_t value) {
+ size_t num_instructions = 0u;
+ auto count_rri = [&](XRegister, XRegister, int32_t) { ++num_instructions; };
+ auto count_ru = [&](XRegister, uint32_t) { ++num_instructions; };
+ emit_with_slli_addi_helper(Zero, value, count_rri, count_rri, count_rri, count_ru);
+ return num_instructions;
+ };
+
+ size_t insns_needed = count_instructions_with_slli_addi(imm);
+ size_t trailing_slli_shamt = 0u;
+ if (insns_needed > 2u) {
+ // Sometimes it's better to end with a SLLI even when the above code would end with ADDI.
+ if ((imm & 1) == 0 && (imm & 0xfff) != 0) {
+ int64_t value = imm >> CTZ(imm);
+ size_t new_insns_needed = count_instructions_with_slli_addi(value) + /*SLLI*/ 1u;
+ DCHECK_GT(new_insns_needed, 2u);
+ if (insns_needed > new_insns_needed) {
+ insns_needed = new_insns_needed;
+ trailing_slli_shamt = CTZ(imm);
+ }
+ }
+
+ // Sometimes we can emit a shorter sequence that ends with SRLI.
+ if (imm > 0) {
+ size_t shamt = CLZ(static_cast<uint64_t>(imm));
+ DCHECK_LE(shamt, 32u); // Otherwise we would not get here as `insns_needed` would be <= 2.
+ if (imm == dchecked_integral_cast<int64_t>(MaxInt<uint64_t>(64 - shamt))) {
+ Addi(rd, Zero, -1);
+ Srli(rd, rd, shamt);
+ return;
+ }
+
+ int64_t value = static_cast<int64_t>(static_cast<uint64_t>(imm) << shamt);
+ DCHECK_LT(value, 0);
+ if (is_simple_li_value(value)){
+ size_t new_insns_needed = count_simple_li_instructions(value) + /*SRLI*/ 1u;
+ // In case of equal number of instructions, clang prefers the sequence without SRLI.
+ if (new_insns_needed < insns_needed) {
+ // If we emit ADDI, we set low bits that shall be shifted out to one in line with clang,
+ // effectively choosing to emit the negative constant closest to zero.
+ int32_t shifted_out = dchecked_integral_cast<int32_t>(MaxInt<uint32_t>(shamt));
+ DCHECK_EQ(value & shifted_out, 0);
+ emit_simple_li(rd, (value & 0xfff) == 0 ? value : value + shifted_out);
+ Srli(rd, rd, shamt);
+ return;
+ }
+ }
+
+ size_t ctz = CTZ(static_cast<uint64_t>(value));
+ if (IsInt(ctz + 20, value)) {
+ size_t new_insns_needed = /*ADDI or LUI*/ 1u + /*SLLI*/ 1u + /*SRLI*/ 1u;
+ if (new_insns_needed < insns_needed) {
+ // Clang prefers ADDI+SLLI+SRLI over LUI+SLLI+SRLI.
+ if (IsInt(ctz + 12, value)) {
+ Addi(rd, Zero, value >> ctz);
+ Slli(rd, rd, ctz);
+ } else {
+ Lui(rd, (static_cast<uint64_t>(value) >> ctz) & 0xfffffu);
+ Slli(rd, rd, ctz - 12);
+ }
+ Srli(rd, rd, shamt);
+ return;
+ }
+ }
+ }
+
+ // If we can use a scratch register, try using it to emit a shorter sequence. Without a
+ // scratch reg, the sequence is up to 8 instructions, with a scratch reg only up to 6.
+ if (can_use_tmp) {
+ int64_t low = (imm & 0xffffffff) - ((imm & 0x80000000) << 1);
+ int64_t remainder = imm - low;
+ size_t slli_shamt = CTZ(remainder);
+ DCHECK_GE(slli_shamt, 32u);
+ int64_t high = remainder >> slli_shamt;
+ size_t new_insns_needed =
+ ((IsInt<20>(high) || (high & 0xfff) == 0u) ? 1u : 2u) +
+ count_simple_li_instructions(low) +
+ /*SLLI+ADD*/ 2u;
+ if (new_insns_needed < insns_needed) {
+ DCHECK_NE(low & 0xfffff000, 0);
+ XRegister tmp = srs.AllocateXRegister();
+ if (IsInt<20>(high) && !IsInt<12>(high)) {
+ // Emit the signed 20-bit value with LUI and reduce the SLLI shamt by 12 to compensate.
+ Lui(rd, static_cast<uint32_t>(high & 0xfffff));
+ slli_shamt -= 12;
+ } else {
+ emit_simple_li(rd, high);
+ }
+ emit_simple_li(tmp, low);
+ Slli(rd, rd, slli_shamt);
+ Add(rd, rd, tmp);
+ return;
+ }
+ }
+ }
+ emit_with_slli_addi(rd, trailing_slli_shamt != 0u ? imm >> trailing_slli_shamt : imm);
+ if (trailing_slli_shamt != 0u) {
+ Slli(rd, rd, trailing_slli_shamt);
+ }
+}
+
+/////////////////////////////// RV64 VARIANTS extension end ////////////
+
+} // namespace riscv64
+} // namespace art
diff --git a/compiler/utils/riscv64/assembler_riscv64.h b/compiler/utils/riscv64/assembler_riscv64.h
new file mode 100644
index 0000000000..15f2518c87
--- /dev/null
+++ b/compiler/utils/riscv64/assembler_riscv64.h
@@ -0,0 +1,1178 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_
+#define ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_
+
+#include <cstdint>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arch/riscv64/instruction_set_features_riscv64.h"
+#include "base/arena_containers.h"
+#include "base/enums.h"
+#include "base/globals.h"
+#include "base/macros.h"
+#include "managed_register_riscv64.h"
+#include "utils/assembler.h"
+#include "utils/label.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+class ScratchRegisterScope;
+
+static constexpr size_t kRiscv64HalfwordSize = 2;
+static constexpr size_t kRiscv64WordSize = 4;
+static constexpr size_t kRiscv64DoublewordSize = 8;
+static constexpr size_t kRiscv64FloatRegSizeInBytes = 8;
+
+enum class FPRoundingMode : uint32_t {
+ kRNE = 0x0, // Round to Nearest, ties to Even
+ kRTZ = 0x1, // Round towards Zero
+ kRDN = 0x2, // Round Down (towards −Infinity)
+ kRUP = 0x3, // Round Up (towards +Infinity)
+ kRMM = 0x4, // Round to Nearest, ties to Max Magnitude
+ kDYN = 0x7, // Dynamic rounding mode
+ kDefault = kDYN,
+ // Some instructions never need to round even though the spec includes the RM field.
+ // To simplify testing, emit the RM as 0 by default for these instructions because that's what
+ // `clang` does and because the `llvm-objdump` fails to disassemble the other rounding modes.
+ kIgnored = 0
+};
+
+enum class AqRl : uint32_t {
+ kNone = 0x0,
+ kRelease = 0x1,
+ kAcquire = 0x2,
+ kAqRl = kRelease | kAcquire
+};
+
+// the type for fence
+enum FenceType {
+ kFenceNone = 0,
+ kFenceWrite = 1,
+ kFenceRead = 2,
+ kFenceOutput = 4,
+ kFenceInput = 8,
+ kFenceDefault = 0xf,
+};
+
+// Used to test the values returned by FClassS/FClassD.
+enum FPClassMaskType {
+ kNegativeInfinity = 0x001,
+ kNegativeNormal = 0x002,
+ kNegativeSubnormal = 0x004,
+ kNegativeZero = 0x008,
+ kPositiveZero = 0x010,
+ kPositiveSubnormal = 0x020,
+ kPositiveNormal = 0x040,
+ kPositiveInfinity = 0x080,
+ kSignalingNaN = 0x100,
+ kQuietNaN = 0x200,
+};
+
+class Riscv64Label : public Label {
+ public:
+ Riscv64Label() : prev_branch_id_(kNoPrevBranchId) {}
+
+ Riscv64Label(Riscv64Label&& src) noexcept
+ // NOLINTNEXTLINE - src.prev_branch_id_ is valid after the move
+ : Label(std::move(src)), prev_branch_id_(src.prev_branch_id_) {}
+
+ private:
+ static constexpr uint32_t kNoPrevBranchId = std::numeric_limits<uint32_t>::max();
+
+ uint32_t prev_branch_id_; // To get distance from preceding branch, if any.
+
+ friend class Riscv64Assembler;
+ DISALLOW_COPY_AND_ASSIGN(Riscv64Label);
+};
+
+// Assembler literal is a value embedded in code, retrieved using a PC-relative load.
+class Literal {
+ public:
+ static constexpr size_t kMaxSize = 8;
+
+ Literal(uint32_t size, const uint8_t* data) : label_(), size_(size) {
+ DCHECK_LE(size, Literal::kMaxSize);
+ memcpy(data_, data, size);
+ }
+
+ template <typename T>
+ T GetValue() const {
+ DCHECK_EQ(size_, sizeof(T));
+ T value;
+ memcpy(&value, data_, sizeof(T));
+ return value;
+ }
+
+ uint32_t GetSize() const { return size_; }
+
+ const uint8_t* GetData() const { return data_; }
+
+ Riscv64Label* GetLabel() { return &label_; }
+
+ const Riscv64Label* GetLabel() const { return &label_; }
+
+ private:
+ Riscv64Label label_;
+ const uint32_t size_;
+ uint8_t data_[kMaxSize];
+
+ DISALLOW_COPY_AND_ASSIGN(Literal);
+};
+
+// Jump table: table of labels emitted after the code and before the literals. Similar to literals.
+class JumpTable {
+ public:
+ explicit JumpTable(ArenaVector<Riscv64Label*>&& labels) : label_(), labels_(std::move(labels)) {}
+
+ size_t GetSize() const { return labels_.size() * sizeof(int32_t); }
+
+ const ArenaVector<Riscv64Label*>& GetData() const { return labels_; }
+
+ Riscv64Label* GetLabel() { return &label_; }
+
+ const Riscv64Label* GetLabel() const { return &label_; }
+
+ private:
+ Riscv64Label label_;
+ ArenaVector<Riscv64Label*> labels_;
+
+ DISALLOW_COPY_AND_ASSIGN(JumpTable);
+};
+
+class Riscv64Assembler final : public Assembler {
+ public:
+ explicit Riscv64Assembler(ArenaAllocator* allocator,
+ const Riscv64InstructionSetFeatures* instruction_set_features = nullptr)
+ : Assembler(allocator),
+ branches_(allocator->Adapter(kArenaAllocAssembler)),
+ finalized_(false),
+ overwriting_(false),
+ overwrite_location_(0),
+ literals_(allocator->Adapter(kArenaAllocAssembler)),
+ long_literals_(allocator->Adapter(kArenaAllocAssembler)),
+ jump_tables_(allocator->Adapter(kArenaAllocAssembler)),
+ last_position_adjustment_(0),
+ last_old_position_(0),
+ last_branch_id_(0),
+ available_scratch_core_registers_((1u << TMP) | (1u << TMP2)),
+ available_scratch_fp_registers_(1u << FTMP) {
+ UNUSED(instruction_set_features);
+ cfi().DelayEmittingAdvancePCs();
+ }
+
+ virtual ~Riscv64Assembler() {
+ for (auto& branch : branches_) {
+ CHECK(branch.IsResolved());
+ }
+ }
+
+ size_t CodeSize() const override { return Assembler::CodeSize(); }
+ DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); }
+
+ // According to "The RISC-V Instruction Set Manual"
+
+ // LUI/AUIPC (RV32I, with sign-extension on RV64I), opcode = 0x17, 0x37
+ // Note: These take a 20-bit unsigned value to align with the clang assembler for testing,
+ // but the value stored in the register shall actually be sign-extended to 64 bits.
+ void Lui(XRegister rd, uint32_t imm20);
+ void Auipc(XRegister rd, uint32_t imm20);
+
+ // Jump instructions (RV32I), opcode = 0x67, 0x6f
+ void Jal(XRegister rd, int32_t offset);
+ void Jalr(XRegister rd, XRegister rs1, int32_t offset);
+
+ // Branch instructions (RV32I), opcode = 0x63, funct3 from 0x0 ~ 0x1 and 0x4 ~ 0x7
+ void Beq(XRegister rs1, XRegister rs2, int32_t offset);
+ void Bne(XRegister rs1, XRegister rs2, int32_t offset);
+ void Blt(XRegister rs1, XRegister rs2, int32_t offset);
+ void Bge(XRegister rs1, XRegister rs2, int32_t offset);
+ void Bltu(XRegister rs1, XRegister rs2, int32_t offset);
+ void Bgeu(XRegister rs1, XRegister rs2, int32_t offset);
+
+ // Load instructions (RV32I+RV64I): opcode = 0x03, funct3 from 0x0 ~ 0x6
+ void Lb(XRegister rd, XRegister rs1, int32_t offset);
+ void Lh(XRegister rd, XRegister rs1, int32_t offset);
+ void Lw(XRegister rd, XRegister rs1, int32_t offset);
+ void Ld(XRegister rd, XRegister rs1, int32_t offset);
+ void Lbu(XRegister rd, XRegister rs1, int32_t offset);
+ void Lhu(XRegister rd, XRegister rs1, int32_t offset);
+ void Lwu(XRegister rd, XRegister rs1, int32_t offset);
+
+ // Store instructions (RV32I+RV64I): opcode = 0x23, funct3 from 0x0 ~ 0x3
+ void Sb(XRegister rs2, XRegister rs1, int32_t offset);
+ void Sh(XRegister rs2, XRegister rs1, int32_t offset);
+ void Sw(XRegister rs2, XRegister rs1, int32_t offset);
+ void Sd(XRegister rs2, XRegister rs1, int32_t offset);
+
+ // IMM ALU instructions (RV32I): opcode = 0x13, funct3 from 0x0 ~ 0x7
+ void Addi(XRegister rd, XRegister rs1, int32_t imm12);
+ void Slti(XRegister rd, XRegister rs1, int32_t imm12);
+ void Sltiu(XRegister rd, XRegister rs1, int32_t imm12);
+ void Xori(XRegister rd, XRegister rs1, int32_t imm12);
+ void Ori(XRegister rd, XRegister rs1, int32_t imm12);
+ void Andi(XRegister rd, XRegister rs1, int32_t imm12);
+ void Slli(XRegister rd, XRegister rs1, int32_t shamt);
+ void Srli(XRegister rd, XRegister rs1, int32_t shamt);
+ void Srai(XRegister rd, XRegister rs1, int32_t shamt);
+
+ // ALU instructions (RV32I): opcode = 0x33, funct3 from 0x0 ~ 0x7
+ void Add(XRegister rd, XRegister rs1, XRegister rs2);
+ void Sub(XRegister rd, XRegister rs1, XRegister rs2);
+ void Slt(XRegister rd, XRegister rs1, XRegister rs2);
+ void Sltu(XRegister rd, XRegister rs1, XRegister rs2);
+ void Xor(XRegister rd, XRegister rs1, XRegister rs2);
+ void Or(XRegister rd, XRegister rs1, XRegister rs2);
+ void And(XRegister rd, XRegister rs1, XRegister rs2);
+ void Sll(XRegister rd, XRegister rs1, XRegister rs2);
+ void Srl(XRegister rd, XRegister rs1, XRegister rs2);
+ void Sra(XRegister rd, XRegister rs1, XRegister rs2);
+
+ // 32bit Imm ALU instructions (RV64I): opcode = 0x1b, funct3 from 0x0, 0x1, 0x5
+ void Addiw(XRegister rd, XRegister rs1, int32_t imm12);
+ void Slliw(XRegister rd, XRegister rs1, int32_t shamt);
+ void Srliw(XRegister rd, XRegister rs1, int32_t shamt);
+ void Sraiw(XRegister rd, XRegister rs1, int32_t shamt);
+
+ // 32bit ALU instructions (RV64I): opcode = 0x3b, funct3 from 0x0 ~ 0x7
+ void Addw(XRegister rd, XRegister rs1, XRegister rs2);
+ void Subw(XRegister rd, XRegister rs1, XRegister rs2);
+ void Sllw(XRegister rd, XRegister rs1, XRegister rs2);
+ void Srlw(XRegister rd, XRegister rs1, XRegister rs2);
+ void Sraw(XRegister rd, XRegister rs1, XRegister rs2);
+
+ // Environment call and breakpoint (RV32I), opcode = 0x73
+ void Ecall();
+ void Ebreak();
+
+ // Fence instruction (RV32I): opcode = 0xf, funct3 = 0
+ void Fence(uint32_t pred = kFenceDefault, uint32_t succ = kFenceDefault);
+ void FenceTso();
+
+ // "Zifencei" Standard Extension, opcode = 0xf, funct3 = 1
+ void FenceI();
+
+ // RV32M Standard Extension: opcode = 0x33, funct3 from 0x0 ~ 0x7
+ void Mul(XRegister rd, XRegister rs1, XRegister rs2);
+ void Mulh(XRegister rd, XRegister rs1, XRegister rs2);
+ void Mulhsu(XRegister rd, XRegister rs1, XRegister rs2);
+ void Mulhu(XRegister rd, XRegister rs1, XRegister rs2);
+ void Div(XRegister rd, XRegister rs1, XRegister rs2);
+ void Divu(XRegister rd, XRegister rs1, XRegister rs2);
+ void Rem(XRegister rd, XRegister rs1, XRegister rs2);
+ void Remu(XRegister rd, XRegister rs1, XRegister rs2);
+
+ // RV64M Standard Extension: opcode = 0x3b, funct3 0x0 and from 0x4 ~ 0x7
+ void Mulw(XRegister rd, XRegister rs1, XRegister rs2);
+ void Divw(XRegister rd, XRegister rs1, XRegister rs2);
+ void Divuw(XRegister rd, XRegister rs1, XRegister rs2);
+ void Remw(XRegister rd, XRegister rs1, XRegister rs2);
+ void Remuw(XRegister rd, XRegister rs1, XRegister rs2);
+
+ // RV32A/RV64A Standard Extension
+ void LrW(XRegister rd, XRegister rs1, AqRl aqrl);
+ void LrD(XRegister rd, XRegister rs1, AqRl aqrl);
+ void ScW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+ void ScD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+ void AmoSwapW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+ void AmoSwapD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+ void AmoAddW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+ void AmoAddD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+ void AmoXorW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+ void AmoXorD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+ void AmoAndW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+ void AmoAndD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+ void AmoOrW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+ void AmoOrD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+ void AmoMinW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+ void AmoMinD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+ void AmoMaxW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+ void AmoMaxD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+ void AmoMinuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+ void AmoMinuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+ void AmoMaxuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+ void AmoMaxuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+
+ // "Zicsr" Standard Extension, opcode = 0x73, funct3 from 0x1 ~ 0x3 and 0x5 ~ 0x7
+ void Csrrw(XRegister rd, uint32_t csr, XRegister rs1);
+ void Csrrs(XRegister rd, uint32_t csr, XRegister rs1);
+ void Csrrc(XRegister rd, uint32_t csr, XRegister rs1);
+ void Csrrwi(XRegister rd, uint32_t csr, uint32_t uimm5);
+ void Csrrsi(XRegister rd, uint32_t csr, uint32_t uimm5);
+ void Csrrci(XRegister rd, uint32_t csr, uint32_t uimm5);
+
+ // FP load/store instructions (RV32F+RV32D): opcode = 0x07, 0x27
+ void FLw(FRegister rd, XRegister rs1, int32_t offset);
+ void FLd(FRegister rd, XRegister rs1, int32_t offset);
+ void FSw(FRegister rs2, XRegister rs1, int32_t offset);
+ void FSd(FRegister rs2, XRegister rs1, int32_t offset);
+
+ // FP FMA instructions (RV32F+RV32D): opcode = 0x43, 0x47, 0x4b, 0x4f
+ void FMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
+ void FMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
+ void FMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
+ void FMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
+ void FNMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
+ void FNMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
+ void FNMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
+ void FNMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
+
+ // FP FMA instruction helpers passing the default rounding mode.
+ void FMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
+ FMAddS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
+ }
+ void FMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
+ FMAddD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
+ }
+ void FMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
+ FMSubS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
+ }
+ void FMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
+ FMSubD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
+ }
+ void FNMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
+ FNMSubS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
+ }
+ void FNMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
+ FNMSubD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
+ }
+ void FNMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
+ FNMAddS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
+ }
+ void FNMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
+ FNMAddD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
+ }
+
+ // Simple FP instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b0XXXX0D
+ void FAddS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
+ void FAddD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
+ void FSubS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
+ void FSubD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
+ void FMulS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
+ void FMulD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
+ void FDivS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
+ void FDivD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
+ void FSqrtS(FRegister rd, FRegister rs1, FPRoundingMode frm);
+ void FSqrtD(FRegister rd, FRegister rs1, FPRoundingMode frm);
+ void FSgnjS(FRegister rd, FRegister rs1, FRegister rs2);
+ void FSgnjD(FRegister rd, FRegister rs1, FRegister rs2);
+ void FSgnjnS(FRegister rd, FRegister rs1, FRegister rs2);
+ void FSgnjnD(FRegister rd, FRegister rs1, FRegister rs2);
+ void FSgnjxS(FRegister rd, FRegister rs1, FRegister rs2);
+ void FSgnjxD(FRegister rd, FRegister rs1, FRegister rs2);
+ void FMinS(FRegister rd, FRegister rs1, FRegister rs2);
+ void FMinD(FRegister rd, FRegister rs1, FRegister rs2);
+ void FMaxS(FRegister rd, FRegister rs1, FRegister rs2);
+ void FMaxD(FRegister rd, FRegister rs1, FRegister rs2);
+ void FCvtSD(FRegister rd, FRegister rs1, FPRoundingMode frm);
+ void FCvtDS(FRegister rd, FRegister rs1, FPRoundingMode frm);
+
+ // Simple FP instruction helpers passing the default rounding mode.
+ void FAddS(FRegister rd, FRegister rs1, FRegister rs2) {
+ FAddS(rd, rs1, rs2, FPRoundingMode::kDefault);
+ }
+ void FAddD(FRegister rd, FRegister rs1, FRegister rs2) {
+ FAddD(rd, rs1, rs2, FPRoundingMode::kDefault);
+ }
+ void FSubS(FRegister rd, FRegister rs1, FRegister rs2) {
+ FSubS(rd, rs1, rs2, FPRoundingMode::kDefault);
+ }
+ void FSubD(FRegister rd, FRegister rs1, FRegister rs2) {
+ FSubD(rd, rs1, rs2, FPRoundingMode::kDefault);
+ }
+ void FMulS(FRegister rd, FRegister rs1, FRegister rs2) {
+ FMulS(rd, rs1, rs2, FPRoundingMode::kDefault);
+ }
+ void FMulD(FRegister rd, FRegister rs1, FRegister rs2) {
+ FMulD(rd, rs1, rs2, FPRoundingMode::kDefault);
+ }
+ void FDivS(FRegister rd, FRegister rs1, FRegister rs2) {
+ FDivS(rd, rs1, rs2, FPRoundingMode::kDefault);
+ }
+ void FDivD(FRegister rd, FRegister rs1, FRegister rs2) {
+ FDivD(rd, rs1, rs2, FPRoundingMode::kDefault);
+ }
+ void FSqrtS(FRegister rd, FRegister rs1) {
+ FSqrtS(rd, rs1, FPRoundingMode::kDefault);
+ }
+ void FSqrtD(FRegister rd, FRegister rs1) {
+ FSqrtD(rd, rs1, FPRoundingMode::kDefault);
+ }
+ void FCvtSD(FRegister rd, FRegister rs1) {
+ FCvtSD(rd, rs1, FPRoundingMode::kDefault);
+ }
+ void FCvtDS(FRegister rd, FRegister rs1) {
+ FCvtDS(rd, rs1, FPRoundingMode::kIgnored);
+ }
+
+ // FP compare instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b101000D
+ void FEqS(XRegister rd, FRegister rs1, FRegister rs2);
+ void FEqD(XRegister rd, FRegister rs1, FRegister rs2);
+ void FLtS(XRegister rd, FRegister rs1, FRegister rs2);
+ void FLtD(XRegister rd, FRegister rs1, FRegister rs2);
+ void FLeS(XRegister rd, FRegister rs1, FRegister rs2);
+ void FLeD(XRegister rd, FRegister rs1, FRegister rs2);
+
+ // FP conversion instructions (RV32F+RV32D+RV64F+RV64D): opcode = 0x53, funct7 = 0b110X00D
+ void FCvtWS(XRegister rd, FRegister rs1, FPRoundingMode frm);
+ void FCvtWD(XRegister rd, FRegister rs1, FPRoundingMode frm);
+ void FCvtWuS(XRegister rd, FRegister rs1, FPRoundingMode frm);
+ void FCvtWuD(XRegister rd, FRegister rs1, FPRoundingMode frm);
+ void FCvtLS(XRegister rd, FRegister rs1, FPRoundingMode frm);
+ void FCvtLD(XRegister rd, FRegister rs1, FPRoundingMode frm);
+ void FCvtLuS(XRegister rd, FRegister rs1, FPRoundingMode frm);
+ void FCvtLuD(XRegister rd, FRegister rs1, FPRoundingMode frm);
+ void FCvtSW(FRegister rd, XRegister rs1, FPRoundingMode frm);
+ void FCvtDW(FRegister rd, XRegister rs1, FPRoundingMode frm);
+ void FCvtSWu(FRegister rd, XRegister rs1, FPRoundingMode frm);
+ void FCvtDWu(FRegister rd, XRegister rs1, FPRoundingMode frm);
+ void FCvtSL(FRegister rd, XRegister rs1, FPRoundingMode frm);
+ void FCvtDL(FRegister rd, XRegister rs1, FPRoundingMode frm);
+ void FCvtSLu(FRegister rd, XRegister rs1, FPRoundingMode frm);
+ void FCvtDLu(FRegister rd, XRegister rs1, FPRoundingMode frm);
+
+ // FP conversion instruction helpers passing the default rounding mode.
+ void FCvtWS(XRegister rd, FRegister rs1) { FCvtWS(rd, rs1, FPRoundingMode::kDefault); }
+ void FCvtWD(XRegister rd, FRegister rs1) { FCvtWD(rd, rs1, FPRoundingMode::kDefault); }
+ void FCvtWuS(XRegister rd, FRegister rs1) { FCvtWuS(rd, rs1, FPRoundingMode::kDefault); }
+ void FCvtWuD(XRegister rd, FRegister rs1) { FCvtWuD(rd, rs1, FPRoundingMode::kDefault); }
+ void FCvtLS(XRegister rd, FRegister rs1) { FCvtLS(rd, rs1, FPRoundingMode::kDefault); }
+ void FCvtLD(XRegister rd, FRegister rs1) { FCvtLD(rd, rs1, FPRoundingMode::kDefault); }
+ void FCvtLuS(XRegister rd, FRegister rs1) { FCvtLuS(rd, rs1, FPRoundingMode::kDefault); }
+ void FCvtLuD(XRegister rd, FRegister rs1) { FCvtLuD(rd, rs1, FPRoundingMode::kDefault); }
+ void FCvtSW(FRegister rd, XRegister rs1) { FCvtSW(rd, rs1, FPRoundingMode::kDefault); }
+ void FCvtDW(FRegister rd, XRegister rs1) { FCvtDW(rd, rs1, FPRoundingMode::kIgnored); }
+ void FCvtSWu(FRegister rd, XRegister rs1) { FCvtSWu(rd, rs1, FPRoundingMode::kDefault); }
+ void FCvtDWu(FRegister rd, XRegister rs1) { FCvtDWu(rd, rs1, FPRoundingMode::kIgnored); }
+ void FCvtSL(FRegister rd, XRegister rs1) { FCvtSL(rd, rs1, FPRoundingMode::kDefault); }
+ void FCvtDL(FRegister rd, XRegister rs1) { FCvtDL(rd, rs1, FPRoundingMode::kDefault); }
+ void FCvtSLu(FRegister rd, XRegister rs1) { FCvtSLu(rd, rs1, FPRoundingMode::kDefault); }
+ void FCvtDLu(FRegister rd, XRegister rs1) { FCvtDLu(rd, rs1, FPRoundingMode::kDefault); }
+
+ // FP move instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x0, funct7 = 0b111X00D
+ void FMvXW(XRegister rd, FRegister rs1);
+ void FMvXD(XRegister rd, FRegister rs1);
+ void FMvWX(FRegister rd, XRegister rs1);
+ void FMvDX(FRegister rd, XRegister rs1);
+
+ // FP classify instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x1, funct7 = 0b111X00D
+ void FClassS(XRegister rd, FRegister rs1);
+ void FClassD(XRegister rd, FRegister rs1);
+
+ // "Zba" Standard Extension, opcode = 0x1b, 0x33 or 0x3b, funct3 and funct7 varies.
+ void AddUw(XRegister rd, XRegister rs1, XRegister rs2);
+ void Sh1Add(XRegister rd, XRegister rs1, XRegister rs2);
+ void Sh1AddUw(XRegister rd, XRegister rs1, XRegister rs2);
+ void Sh2Add(XRegister rd, XRegister rs1, XRegister rs2);
+ void Sh2AddUw(XRegister rd, XRegister rs1, XRegister rs2);
+ void Sh3Add(XRegister rd, XRegister rs1, XRegister rs2);
+ void Sh3AddUw(XRegister rd, XRegister rs1, XRegister rs2);
+ void SlliUw(XRegister rd, XRegister rs1, int32_t shamt);
+
+ // "Zbb" Standard Extension, opcode = 0x13, 0x1b or 0x33, funct3 and funct7 varies.
+ // Note: We do not support 32-bit sext.b, sext.h and zext.h from the Zbb extension.
+ // (Neither does the clang-r498229's assembler which we currently test against.)
+ void Andn(XRegister rd, XRegister rs1, XRegister rs2);
+ void Orn(XRegister rd, XRegister rs1, XRegister rs2);
+ void Xnor(XRegister rd, XRegister rs1, XRegister rs2);
+ void Clz(XRegister rd, XRegister rs1);
+ void Clzw(XRegister rd, XRegister rs1);
+ void Ctz(XRegister rd, XRegister rs1);
+ void Ctzw(XRegister rd, XRegister rs1);
+ void Cpop(XRegister rd, XRegister rs1);
+ void Cpopw(XRegister rd, XRegister rs1);
+ void Min(XRegister rd, XRegister rs1, XRegister rs2);
+ void Minu(XRegister rd, XRegister rs1, XRegister rs2);
+ void Max(XRegister rd, XRegister rs1, XRegister rs2);
+ void Maxu(XRegister rd, XRegister rs1, XRegister rs2);
+ void Rol(XRegister rd, XRegister rs1, XRegister rs2);
+ void Rolw(XRegister rd, XRegister rs1, XRegister rs2);
+ void Ror(XRegister rd, XRegister rs1, XRegister rs2);
+ void Rorw(XRegister rd, XRegister rs1, XRegister rs2);
+ void Rori(XRegister rd, XRegister rs1, int32_t shamt);
+ void Roriw(XRegister rd, XRegister rs1, int32_t shamt);
+ void OrcB(XRegister rd, XRegister rs1);
+ void Rev8(XRegister rd, XRegister rs1);
+
+ ////////////////////////////// RV64 MACRO Instructions START ///////////////////////////////
+ // These pseudo instructions are from "RISC-V Assembly Programmer's Manual".
+
+ void Nop();
+ void Li(XRegister rd, int64_t imm);
+ void Mv(XRegister rd, XRegister rs);
+ void Not(XRegister rd, XRegister rs);
+ void Neg(XRegister rd, XRegister rs);
+ void NegW(XRegister rd, XRegister rs);
+ void SextB(XRegister rd, XRegister rs);
+ void SextH(XRegister rd, XRegister rs);
+ void SextW(XRegister rd, XRegister rs);
+ void ZextB(XRegister rd, XRegister rs);
+ void ZextH(XRegister rd, XRegister rs);
+ void ZextW(XRegister rd, XRegister rs);
+ void Seqz(XRegister rd, XRegister rs);
+ void Snez(XRegister rd, XRegister rs);
+ void Sltz(XRegister rd, XRegister rs);
+ void Sgtz(XRegister rd, XRegister rs);
+ void FMvS(FRegister rd, FRegister rs);
+ void FAbsS(FRegister rd, FRegister rs);
+ void FNegS(FRegister rd, FRegister rs);
+ void FMvD(FRegister rd, FRegister rs);
+ void FAbsD(FRegister rd, FRegister rs);
+ void FNegD(FRegister rd, FRegister rs);
+
+ // Branch pseudo instructions
+ void Beqz(XRegister rs, int32_t offset);
+ void Bnez(XRegister rs, int32_t offset);
+ void Blez(XRegister rs, int32_t offset);
+ void Bgez(XRegister rs, int32_t offset);
+ void Bltz(XRegister rs, int32_t offset);
+ void Bgtz(XRegister rs, int32_t offset);
+ void Bgt(XRegister rs, XRegister rt, int32_t offset);
+ void Ble(XRegister rs, XRegister rt, int32_t offset);
+ void Bgtu(XRegister rs, XRegister rt, int32_t offset);
+ void Bleu(XRegister rs, XRegister rt, int32_t offset);
+
+ // Jump pseudo instructions
+ void J(int32_t offset);
+ void Jal(int32_t offset);
+ void Jr(XRegister rs);
+ void Jalr(XRegister rs);
+ void Jalr(XRegister rd, XRegister rs);
+ void Ret();
+
+ // Pseudo instructions for accessing control and status registers
+ void RdCycle(XRegister rd);
+ void RdTime(XRegister rd);
+ void RdInstret(XRegister rd);
+ void Csrr(XRegister rd, uint32_t csr);
+ void Csrw(uint32_t csr, XRegister rs);
+ void Csrs(uint32_t csr, XRegister rs);
+ void Csrc(uint32_t csr, XRegister rs);
+ void Csrwi(uint32_t csr, uint32_t uimm5);
+ void Csrsi(uint32_t csr, uint32_t uimm5);
+ void Csrci(uint32_t csr, uint32_t uimm5);
+
+ // Load/store macros for arbitrary 32-bit offsets.
+ void Loadb(XRegister rd, XRegister rs1, int32_t offset);
+ void Loadh(XRegister rd, XRegister rs1, int32_t offset);
+ void Loadw(XRegister rd, XRegister rs1, int32_t offset);
+ void Loadd(XRegister rd, XRegister rs1, int32_t offset);
+ void Loadbu(XRegister rd, XRegister rs1, int32_t offset);
+ void Loadhu(XRegister rd, XRegister rs1, int32_t offset);
+ void Loadwu(XRegister rd, XRegister rs1, int32_t offset);
+ void Storeb(XRegister rs2, XRegister rs1, int32_t offset);
+ void Storeh(XRegister rs2, XRegister rs1, int32_t offset);
+ void Storew(XRegister rs2, XRegister rs1, int32_t offset);
+ void Stored(XRegister rs2, XRegister rs1, int32_t offset);
+ void FLoadw(FRegister rd, XRegister rs1, int32_t offset);
+ void FLoadd(FRegister rd, XRegister rs1, int32_t offset);
+ void FStorew(FRegister rs2, XRegister rs1, int32_t offset);
+ void FStored(FRegister rs2, XRegister rs1, int32_t offset);
+
+ // Macros for loading constants.
+ void LoadConst32(XRegister rd, int32_t value);
+ void LoadConst64(XRegister rd, int64_t value);
+
+ // Macros for adding constants.
+ void AddConst32(XRegister rd, XRegister rs1, int32_t value);
+ void AddConst64(XRegister rd, XRegister rs1, int64_t value);
+
+ // Jumps and branches to a label.
+ void Beqz(XRegister rs, Riscv64Label* label, bool is_bare = false);
+ void Bnez(XRegister rs, Riscv64Label* label, bool is_bare = false);
+ void Blez(XRegister rs, Riscv64Label* label, bool is_bare = false);
+ void Bgez(XRegister rs, Riscv64Label* label, bool is_bare = false);
+ void Bltz(XRegister rs, Riscv64Label* label, bool is_bare = false);
+ void Bgtz(XRegister rs, Riscv64Label* label, bool is_bare = false);
+ void Beq(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
+ void Bne(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
+ void Ble(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
+ void Bge(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
+ void Blt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
+ void Bgt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
+ void Bleu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
+ void Bgeu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
+ void Bltu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
+ void Bgtu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
+ void Jal(XRegister rd, Riscv64Label* label, bool is_bare = false);
+ void J(Riscv64Label* label, bool is_bare = false);
+ void Jal(Riscv64Label* label, bool is_bare = false);
+
+ // Literal load.
+ void Loadw(XRegister rd, Literal* literal);
+ void Loadwu(XRegister rd, Literal* literal);
+ void Loadd(XRegister rd, Literal* literal);
+ void FLoadw(FRegister rd, Literal* literal);
+ void FLoadd(FRegister rd, Literal* literal);
+
+ // Illegal instruction that triggers SIGILL.
+ void Unimp();
+
+ /////////////////////////////// RV64 MACRO Instructions END ///////////////////////////////
+
+ void Bind(Label* label) override { Bind(down_cast<Riscv64Label*>(label)); }
+
+ void Jump([[maybe_unused]] Label* label) override {
+ UNIMPLEMENTED(FATAL) << "Do not use Jump for RISCV64";
+ }
+
+ void Bind(Riscv64Label* label);
+
+ // Load label address using PC-relative loads.
+ void LoadLabelAddress(XRegister rd, Riscv64Label* label);
+
+ // Create a new literal with a given value.
+ // NOTE:Use `Identity<>` to force the template parameter to be explicitly specified.
+ template <typename T>
+ Literal* NewLiteral(typename Identity<T>::type value) {
+ static_assert(std::is_integral<T>::value, "T must be an integral type.");
+ return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value));
+ }
+
+ // Create a new literal with the given data.
+ Literal* NewLiteral(size_t size, const uint8_t* data);
+
+ // Create a jump table for the given labels that will be emitted when finalizing.
+ // When the table is emitted, offsets will be relative to the location of the table.
+ // The table location is determined by the location of its label (the label precedes
+ // the table data) and should be loaded using LoadLabelAddress().
+ JumpTable* CreateJumpTable(ArenaVector<Riscv64Label*>&& labels);
+
+ public:
+ // Emit slow paths queued during assembly, promote short branches to long if needed,
+ // and emit branches.
+ void FinalizeCode() override;
+
+ // Returns the current location of a label.
+ //
+ // This function must be used instead of `Riscv64Label::GetPosition()`
+ // which returns assembler's internal data instead of an actual location.
+ //
+ // The location can change during branch fixup in `FinalizeCode()`. Before that,
+ // the location is not final and therefore not very useful to external users,
+ // so they should preferably retrieve the location only after `FinalizeCode()`.
+ uint32_t GetLabelLocation(const Riscv64Label* label) const;
+
+ // Get the final position of a label after local fixup based on the old position
+ // recorded before FinalizeCode().
+ uint32_t GetAdjustedPosition(uint32_t old_position);
+
+ private:
+ enum BranchCondition : uint8_t {
+ kCondEQ,
+ kCondNE,
+ kCondLT,
+ kCondGE,
+ kCondLE,
+ kCondGT,
+ kCondLTU,
+ kCondGEU,
+ kCondLEU,
+ kCondGTU,
+ kUncond,
+ };
+
+ // Note that PC-relative literal loads are handled as pseudo branches because they need
+ // to be emitted after branch relocation to use correct offsets.
+ class Branch {
+ public:
+ enum Type : uint8_t {
+ // TODO(riscv64): Support 16-bit instructions ("C" Standard Extension).
+
+ // Short branches (can be promoted to longer).
+ kCondBranch,
+ kUncondBranch,
+ kCall,
+ // Short branches (can't be promoted to longer).
+ // TODO(riscv64): Do we need these (untested) bare branches, or can we remove them?
+ kBareCondBranch,
+ kBareUncondBranch,
+ kBareCall,
+
+ // Medium branch (can be promoted to long).
+ kCondBranch21,
+
+ // Long branches.
+ kLongCondBranch,
+ kLongUncondBranch,
+ kLongCall,
+
+ // Label.
+ kLabel,
+
+ // Literals.
+ kLiteral,
+ kLiteralUnsigned,
+ kLiteralLong,
+ kLiteralFloat,
+ kLiteralDouble,
+ };
+
+ // Bit sizes of offsets defined as enums to minimize chance of typos.
+ enum OffsetBits {
+ kOffset13 = 13,
+ kOffset21 = 21,
+ kOffset32 = 32,
+ };
+
+ static constexpr uint32_t kUnresolved = 0xffffffff; // Unresolved target_
+ static constexpr uint32_t kMaxBranchLength = 12; // In bytes.
+
+ struct BranchInfo {
+ // Branch length in bytes.
+ uint32_t length;
+ // The offset in bytes of the PC used in the (only) PC-relative instruction from
+ // the start of the branch sequence. RISC-V always uses the address of the PC-relative
+ // instruction as the PC, so this is essentially the offset of that instruction.
+ uint32_t pc_offset;
+ // How large (in bits) a PC-relative offset can be for a given type of branch.
+ OffsetBits offset_size;
+ };
+ static const BranchInfo branch_info_[/* Type */];
+
+ // Unconditional branch or call.
+ Branch(uint32_t location, uint32_t target, XRegister rd, bool is_bare);
+ // Conditional branch.
+ Branch(uint32_t location,
+ uint32_t target,
+ BranchCondition condition,
+ XRegister lhs_reg,
+ XRegister rhs_reg,
+ bool is_bare);
+ // Label address or literal.
+ Branch(uint32_t location, uint32_t target, XRegister rd, Type label_or_literal_type);
+ Branch(uint32_t location, uint32_t target, FRegister rd, Type literal_type);
+
+ // Some conditional branches with lhs = rhs are effectively NOPs, while some
+ // others are effectively unconditional.
+ static bool IsNop(BranchCondition condition, XRegister lhs, XRegister rhs);
+ static bool IsUncond(BranchCondition condition, XRegister lhs, XRegister rhs);
+
+ static BranchCondition OppositeCondition(BranchCondition cond);
+
+ Type GetType() const;
+ BranchCondition GetCondition() const;
+ XRegister GetLeftRegister() const;
+ XRegister GetRightRegister() const;
+ FRegister GetFRegister() const;
+ uint32_t GetTarget() const;
+ uint32_t GetLocation() const;
+ uint32_t GetOldLocation() const;
+ uint32_t GetLength() const;
+ uint32_t GetOldLength() const;
+ uint32_t GetEndLocation() const;
+ uint32_t GetOldEndLocation() const;
+ bool IsBare() const;
+ bool IsResolved() const;
+
+ // Returns the bit size of the signed offset that the branch instruction can handle.
+ OffsetBits GetOffsetSize() const;
+
+ // Calculates the distance between two byte locations in the assembler buffer and
+ // returns the number of bits needed to represent the distance as a signed integer.
+ static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target);
+
+ // Resolve a branch when the target is known.
+ void Resolve(uint32_t target);
+
+ // Relocate a branch by a given delta if needed due to expansion of this or another
+ // branch at a given location by this delta (just changes location_ and target_).
+ void Relocate(uint32_t expand_location, uint32_t delta);
+
+ // If necessary, updates the type by promoting a short branch to a longer branch
+ // based on the branch location and target. Returns the amount (in bytes) by
+ // which the branch size has increased.
+ uint32_t PromoteIfNeeded();
+
+ // Returns the offset into assembler buffer that shall be used as the base PC for
+ // offset calculation. RISC-V always uses the address of the PC-relative instruction
+ // as the PC, so this is essentially the location of that instruction.
+ uint32_t GetOffsetLocation() const;
+
+ // Calculates and returns the offset ready for encoding in the branch instruction(s).
+ int32_t GetOffset() const;
+
+ private:
+ // Completes branch construction by determining and recording its type.
+ void InitializeType(Type initial_type);
+ // Helper for the above.
+ void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type, Type longest_type);
+
+ uint32_t old_location_; // Offset into assembler buffer in bytes.
+ uint32_t location_; // Offset into assembler buffer in bytes.
+ uint32_t target_; // Offset into assembler buffer in bytes.
+
+ XRegister lhs_reg_; // Left-hand side register in conditional branches or
+ // destination register in calls or literals.
+ XRegister rhs_reg_; // Right-hand side register in conditional branches.
+ FRegister freg_; // Destination register in FP literals.
+ BranchCondition condition_; // Condition for conditional branches.
+
+ Type type_; // Current type of the branch.
+ Type old_type_; // Initial type of the branch.
+ };
+
+ // Branch and literal fixup.
+
+ void EmitBcond(BranchCondition cond, XRegister rs, XRegister rt, int32_t offset);
+ void EmitBranch(Branch* branch);
+ void EmitBranches();
+ void EmitJumpTables();
+ void EmitLiterals();
+
+ void FinalizeLabeledBranch(Riscv64Label* label);
+ void Bcond(Riscv64Label* label,
+ bool is_bare,
+ BranchCondition condition,
+ XRegister lhs,
+ XRegister rhs);
+ void Buncond(Riscv64Label* label, XRegister rd, bool is_bare);
+ template <typename XRegisterOrFRegister>
+ void LoadLiteral(Literal* literal, XRegisterOrFRegister rd, Branch::Type literal_type);
+
+ Branch* GetBranch(uint32_t branch_id);
+ const Branch* GetBranch(uint32_t branch_id) const;
+
+ void ReserveJumpTableSpace();
+ void PromoteBranches();
+ void PatchCFI();
+
+ // Emit data (e.g. encoded instruction or immediate) to the instruction stream.
+ void Emit(uint32_t value);
+
+ // Adjust base register and offset if needed for load/store with a large offset.
+ void AdjustBaseAndOffset(XRegister& base, int32_t& offset, ScratchRegisterScope& srs);
+
+ // Helper templates for loads/stores with 32-bit offsets.
+ template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)>
+ void LoadFromOffset(XRegister rd, XRegister rs1, int32_t offset);
+ template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)>
+ void StoreToOffset(XRegister rs2, XRegister rs1, int32_t offset);
+ template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)>
+ void FLoadFromOffset(FRegister rd, XRegister rs1, int32_t offset);
+ template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)>
+ void FStoreToOffset(FRegister rs2, XRegister rs1, int32_t offset);
+
+ // Implementation helper for `Li()`, `LoadConst32()` and `LoadConst64()`.
+ void LoadImmediate(XRegister rd, int64_t imm, bool can_use_tmp);
+
+ // Emit helpers.
+
+ // I-type instruction:
+ //
+ // 31 20 19 15 14 12 11 7 6 0
+ // -----------------------------------------------------------------
+ // [ . . . . . . . . . . . | . . . . | . . | . . . . | . . . . . . ]
+ // [ imm11:0 rs1 funct3 rd opcode ]
+ // -----------------------------------------------------------------
+ template <typename Reg1, typename Reg2>
+ void EmitI(int32_t imm12, Reg1 rs1, uint32_t funct3, Reg2 rd, uint32_t opcode) {
+ DCHECK(IsInt<12>(imm12)) << imm12;
+ DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
+ DCHECK(IsUint<3>(funct3));
+ DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
+ DCHECK(IsUint<7>(opcode));
+ uint32_t encoding = static_cast<uint32_t>(imm12) << 20 | static_cast<uint32_t>(rs1) << 15 |
+ funct3 << 12 | static_cast<uint32_t>(rd) << 7 | opcode;
+ Emit(encoding);
+ }
+
+ // R-type instruction:
+ //
+ // 31 25 24 20 19 15 14 12 11 7 6 0
+ // -----------------------------------------------------------------
+ // [ . . . . . . | . . . . | . . . . | . . | . . . . | . . . . . . ]
+ // [ funct7 rs2 rs1 funct3 rd opcode ]
+ // -----------------------------------------------------------------
+ template <typename Reg1, typename Reg2, typename Reg3>
+ void EmitR(uint32_t funct7, Reg1 rs2, Reg2 rs1, uint32_t funct3, Reg3 rd, uint32_t opcode) {
+ DCHECK(IsUint<7>(funct7));
+ DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
+ DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
+ DCHECK(IsUint<3>(funct3));
+ DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
+ DCHECK(IsUint<7>(opcode));
+ uint32_t encoding = funct7 << 25 | static_cast<uint32_t>(rs2) << 20 |
+ static_cast<uint32_t>(rs1) << 15 | funct3 << 12 |
+ static_cast<uint32_t>(rd) << 7 | opcode;
+ Emit(encoding);
+ }
+
+ // R-type instruction variant for floating-point fused multiply-add/sub (F[N]MADD/ F[N]MSUB):
+ //
+ // 31 27 25 24 20 19 15 14 12 11 7 6 0
+ // -----------------------------------------------------------------
+ // [ . . . . | . | . . . . | . . . . | . . | . . . . | . . . . . . ]
+ // [ rs3 fmt rs2 rs1 funct3 rd opcode ]
+ // -----------------------------------------------------------------
+ template <typename Reg1, typename Reg2, typename Reg3, typename Reg4>
+ void EmitR4(
+ Reg1 rs3, uint32_t fmt, Reg2 rs2, Reg3 rs1, uint32_t funct3, Reg4 rd, uint32_t opcode) {
+ DCHECK(IsUint<5>(static_cast<uint32_t>(rs3)));
+ DCHECK(IsUint<2>(fmt));
+ DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
+ DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
+ DCHECK(IsUint<3>(funct3));
+ DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
+ DCHECK(IsUint<7>(opcode));
+ uint32_t encoding = static_cast<uint32_t>(rs3) << 27 | static_cast<uint32_t>(fmt) << 25 |
+ static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 |
+ static_cast<uint32_t>(funct3) << 12 | static_cast<uint32_t>(rd) << 7 |
+ opcode;
+ Emit(encoding);
+ }
+
+ // S-type instruction:
+ //
+ // 31 25 24 20 19 15 14 12 11 7 6 0
+ // -----------------------------------------------------------------
+ // [ . . . . . . | . . . . | . . . . | . . | . . . . | . . . . . . ]
+ // [ imm11:5 rs2 rs1 funct3 imm4:0 opcode ]
+ // -----------------------------------------------------------------
+ template <typename Reg1, typename Reg2>
+ void EmitS(int32_t imm12, Reg1 rs2, Reg2 rs1, uint32_t funct3, uint32_t opcode) {
+ DCHECK(IsInt<12>(imm12)) << imm12;
+ DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
+ DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
+ DCHECK(IsUint<3>(funct3));
+ DCHECK(IsUint<7>(opcode));
+ uint32_t encoding = (static_cast<uint32_t>(imm12) & 0xFE0) << 20 |
+ static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 |
+ static_cast<uint32_t>(funct3) << 12 |
+ (static_cast<uint32_t>(imm12) & 0x1F) << 7 | opcode;
+ Emit(encoding);
+ }
+
+ // I-type instruction variant for shifts (SLLI / SRLI / SRAI):
+ //
+ // 31 26 25 20 19 15 14 12 11 7 6 0
+ // -----------------------------------------------------------------
+ // [ . . . . . | . . . . . | . . . . | . . | . . . . | . . . . . . ]
+ // [ imm11:6 imm5:0(shamt) rs1 funct3 rd opcode ]
+ // -----------------------------------------------------------------
+ void EmitI6(uint32_t funct6,
+ uint32_t imm6,
+ XRegister rs1,
+ uint32_t funct3,
+ XRegister rd,
+ uint32_t opcode) {
+ DCHECK(IsUint<6>(funct6));
+ DCHECK(IsUint<6>(imm6)) << imm6;
+ DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
+ DCHECK(IsUint<3>(funct3));
+ DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
+ DCHECK(IsUint<7>(opcode));
+ uint32_t encoding = funct6 << 26 | static_cast<uint32_t>(imm6) << 20 |
+ static_cast<uint32_t>(rs1) << 15 | funct3 << 12 |
+ static_cast<uint32_t>(rd) << 7 | opcode;
+ Emit(encoding);
+ }
+
+ // B-type instruction:
+ //
+ // 31 30 25 24 20 19 15 14 12 11 8 7 6 0
+ // -----------------------------------------------------------------
+ // [ | . . . . . | . . . . | . . . . | . . | . . . | | . . . . . . ]
+ // imm12 imm11:5 rs2 rs1 funct3 imm4:1 imm11 opcode ]
+ // -----------------------------------------------------------------
+ void EmitB(int32_t offset, XRegister rs2, XRegister rs1, uint32_t funct3, uint32_t opcode) {
+ DCHECK_ALIGNED(offset, 2);
+ DCHECK(IsInt<13>(offset)) << offset;
+ DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
+ DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
+ DCHECK(IsUint<3>(funct3));
+ DCHECK(IsUint<7>(opcode));
+ uint32_t imm12 = (static_cast<uint32_t>(offset) >> 1) & 0xfffu;
+ uint32_t encoding = (imm12 & 0x800u) << (31 - 11) | (imm12 & 0x03f0u) << (25 - 4) |
+ static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 |
+ static_cast<uint32_t>(funct3) << 12 |
+ (imm12 & 0xfu) << 8 | (imm12 & 0x400u) >> (10 - 7) | opcode;
+ Emit(encoding);
+ }
+
+ // U-type instruction:
+ //
+ // 31 12 11 7 6 0
+ // -----------------------------------------------------------------
+ // [ . . . . . . . . . . . . . . . . . . . | . . . . | . . . . . . ]
+ // [ imm31:12 rd opcode ]
+ // -----------------------------------------------------------------
+ void EmitU(uint32_t imm20, XRegister rd, uint32_t opcode) {
+ CHECK(IsUint<20>(imm20)) << imm20;
+ DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
+ DCHECK(IsUint<7>(opcode));
+ uint32_t encoding = imm20 << 12 | static_cast<uint32_t>(rd) << 7 | opcode;
+ Emit(encoding);
+ }
+
+ // J-type instruction:
+ //
+ // 31 30 21 19 12 11 7 6 0
+ // -----------------------------------------------------------------
+ // [ | . . . . . . . . . | | . . . . . . . | . . . . | . . . . . . ]
+ // imm20 imm10:1 imm11 imm19:12 rd opcode ]
+ // -----------------------------------------------------------------
+ void EmitJ(int32_t offset, XRegister rd, uint32_t opcode) {
+ DCHECK_ALIGNED(offset, 2);
+ CHECK(IsInt<21>(offset)) << offset;
+ DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
+ DCHECK(IsUint<7>(opcode));
+ uint32_t imm20 = (static_cast<uint32_t>(offset) >> 1) & 0xfffffu;
+ uint32_t encoding = (imm20 & 0x80000u) << (31 - 19) | (imm20 & 0x03ffu) << 21 |
+ (imm20 & 0x400u) << (20 - 10) | (imm20 & 0x7f800u) << (12 - 11) |
+ static_cast<uint32_t>(rd) << 7 | opcode;
+ Emit(encoding);
+ }
+
+ ArenaVector<Branch> branches_;
+
+ // For checking that we finalize the code only once.
+ bool finalized_;
+
+ // Whether appending instructions at the end of the buffer or overwriting the existing ones.
+ bool overwriting_;
+ // The current overwrite location.
+ uint32_t overwrite_location_;
+
+ // Use `std::deque<>` for literal labels to allow insertions at the end
+ // without invalidating pointers and references to existing elements.
+ ArenaDeque<Literal> literals_;
+ ArenaDeque<Literal> long_literals_; // 64-bit literals separated for alignment reasons.
+
+ // Jump table list.
+ ArenaDeque<JumpTable> jump_tables_;
+
+ // Data for `GetAdjustedPosition()`, see the description there.
+ uint32_t last_position_adjustment_;
+ uint32_t last_old_position_;
+ uint32_t last_branch_id_;
+
+ uint32_t available_scratch_core_registers_;
+ uint32_t available_scratch_fp_registers_;
+
+ static constexpr uint32_t kXlen = 64;
+
+ friend class ScratchRegisterScope;
+
+ DISALLOW_COPY_AND_ASSIGN(Riscv64Assembler);
+};
+
+class ScratchRegisterScope {
+ public:
+ explicit ScratchRegisterScope(Riscv64Assembler* assembler)
+ : assembler_(assembler),
+ old_available_scratch_core_registers_(assembler->available_scratch_core_registers_),
+ old_available_scratch_fp_registers_(assembler->available_scratch_fp_registers_) {}
+
+ ~ScratchRegisterScope() {
+ assembler_->available_scratch_core_registers_ = old_available_scratch_core_registers_;
+ assembler_->available_scratch_fp_registers_ = old_available_scratch_fp_registers_;
+ }
+
+ // Alocate a scratch `XRegister`. There must be an available register to allocate.
+ XRegister AllocateXRegister() {
+ CHECK_NE(assembler_->available_scratch_core_registers_, 0u);
+ // Allocate the highest available scratch register (prefer TMP(T6) over TMP2(T5)).
+ uint32_t reg_num = (BitSizeOf(assembler_->available_scratch_core_registers_) - 1u) -
+ CLZ(assembler_->available_scratch_core_registers_);
+ assembler_->available_scratch_core_registers_ &= ~(1u << reg_num);
+ DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
+ return enum_cast<XRegister>(reg_num);
+ }
+
+ // Free a previously unavailable core register for use as a scratch register.
+ // This can be an arbitrary register, not necessarly the usual `TMP` or `TMP2`.
+ void FreeXRegister(XRegister reg) {
+ uint32_t reg_num = enum_cast<uint32_t>(reg);
+ DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
+ CHECK_EQ((1u << reg_num) & assembler_->available_scratch_core_registers_, 0u);
+ assembler_->available_scratch_core_registers_ |= 1u << reg_num;
+ }
+
+ // The number of available scratch core registers.
+ size_t AvailableXRegisters() {
+ return POPCOUNT(assembler_->available_scratch_core_registers_);
+ }
+
+ // Make sure a core register is available for use as a scratch register.
+ void IncludeXRegister(XRegister reg) {
+ uint32_t reg_num = enum_cast<uint32_t>(reg);
+ DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
+ assembler_->available_scratch_core_registers_ |= 1u << reg_num;
+ }
+
+ // Make sure a core register is not available for use as a scratch register.
+ void ExcludeXRegister(XRegister reg) {
+ uint32_t reg_num = enum_cast<uint32_t>(reg);
+ DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
+ assembler_->available_scratch_core_registers_ &= ~(1u << reg_num);
+ }
+
+ // Alocate a scratch `FRegister`. There must be an available register to allocate.
+ FRegister AllocateFRegister() {
+ CHECK_NE(assembler_->available_scratch_fp_registers_, 0u);
+ // Allocate the highest available scratch register (same as for core registers).
+ uint32_t reg_num = (BitSizeOf(assembler_->available_scratch_fp_registers_) - 1u) -
+ CLZ(assembler_->available_scratch_fp_registers_);
+ assembler_->available_scratch_fp_registers_ &= ~(1u << reg_num);
+ DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
+ return enum_cast<FRegister>(reg_num);
+ }
+
+ // Free a previously unavailable FP register for use as a scratch register.
+ // This can be an arbitrary register, not necessarly the usual `FTMP`.
+ void FreeFRegister(FRegister reg) {
+ uint32_t reg_num = enum_cast<uint32_t>(reg);
+ DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
+ CHECK_EQ((1u << reg_num) & assembler_->available_scratch_fp_registers_, 0u);
+ assembler_->available_scratch_fp_registers_ |= 1u << reg_num;
+ }
+
+ // The number of available scratch FP registers.
+ size_t AvailableFRegisters() {
+ return POPCOUNT(assembler_->available_scratch_fp_registers_);
+ }
+
+ // Make sure an FP register is available for use as a scratch register.
+ void IncludeFRegister(FRegister reg) {
+ uint32_t reg_num = enum_cast<uint32_t>(reg);
+ DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
+ assembler_->available_scratch_fp_registers_ |= 1u << reg_num;
+ }
+
+ // Make sure an FP register is not available for use as a scratch register.
+ void ExcludeFRegister(FRegister reg) {
+ uint32_t reg_num = enum_cast<uint32_t>(reg);
+ DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
+ assembler_->available_scratch_fp_registers_ &= ~(1u << reg_num);
+ }
+
+ private:
+ Riscv64Assembler* const assembler_;
+ const uint32_t old_available_scratch_core_registers_;
+ const uint32_t old_available_scratch_fp_registers_;
+
+ DISALLOW_COPY_AND_ASSIGN(ScratchRegisterScope);
+};
+
+} // namespace riscv64
+} // namespace art
+
+#endif // ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_
diff --git a/compiler/utils/riscv64/assembler_riscv64_test.cc b/compiler/utils/riscv64/assembler_riscv64_test.cc
new file mode 100644
index 0000000000..0299ac25c5
--- /dev/null
+++ b/compiler/utils/riscv64/assembler_riscv64_test.cc
@@ -0,0 +1,2939 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "assembler_riscv64.h"
+
+#include <inttypes.h>
+
+#include <map>
+
+#include "base/bit_utils.h"
+#include "utils/assembler_test.h"
+
+#define __ GetAssembler()->
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+struct RISCV64CpuRegisterCompare {
+ bool operator()(const XRegister& a, const XRegister& b) const { return a < b; }
+};
+
+class AssemblerRISCV64Test : public AssemblerTest<Riscv64Assembler,
+ Riscv64Label,
+ XRegister,
+ FRegister,
+ int32_t> {
+ public:
+ using Base = AssemblerTest<Riscv64Assembler,
+ Riscv64Label,
+ XRegister,
+ FRegister,
+ int32_t>;
+
+ AssemblerRISCV64Test()
+ : instruction_set_features_(Riscv64InstructionSetFeatures::FromVariant("default", nullptr)) {}
+
+ protected:
+ Riscv64Assembler* CreateAssembler(ArenaAllocator* allocator) override {
+ return new (allocator) Riscv64Assembler(allocator, instruction_set_features_.get());
+ }
+
+ InstructionSet GetIsa() override { return InstructionSet::kRiscv64; }
+
+ // Clang's assembler takes advantage of certain extensions for emitting constants with `li`
+ // but our assembler does not. For now, we use a simple `-march` to avoid the divergence.
+ // TODO(riscv64): Implement these more efficient patterns in assembler.
+ void SetUseSimpleMarch(bool value) {
+ use_simple_march_ = value;
+ }
+
+ std::vector<std::string> GetAssemblerCommand() override {
+ std::vector<std::string> result = Base::GetAssemblerCommand();
+ if (use_simple_march_) {
+ auto it = std::find_if(result.begin(),
+ result.end(),
+ [](const std::string& s) { return StartsWith(s, "-march="); });
+ CHECK(it != result.end());
+ *it = "-march=rv64imafd";
+ }
+ return result;
+ }
+
+ std::vector<std::string> GetDisassemblerCommand() override {
+ std::vector<std::string> result = Base::GetDisassemblerCommand();
+ if (use_simple_march_) {
+ auto it = std::find_if(result.begin(),
+ result.end(),
+ [](const std::string& s) { return StartsWith(s, "--mattr="); });
+ CHECK(it != result.end());
+ *it = "--mattr=+F,+D,+A";
+ }
+ return result;
+ }
+
+ void SetUpHelpers() override {
+ if (secondary_register_names_.empty()) {
+ secondary_register_names_.emplace(Zero, "zero");
+ secondary_register_names_.emplace(RA, "ra");
+ secondary_register_names_.emplace(SP, "sp");
+ secondary_register_names_.emplace(GP, "gp");
+ secondary_register_names_.emplace(TP, "tp");
+ secondary_register_names_.emplace(T0, "t0");
+ secondary_register_names_.emplace(T1, "t1");
+ secondary_register_names_.emplace(T2, "t2");
+ secondary_register_names_.emplace(S0, "s0"); // s0/fp
+ secondary_register_names_.emplace(S1, "s1");
+ secondary_register_names_.emplace(A0, "a0");
+ secondary_register_names_.emplace(A1, "a1");
+ secondary_register_names_.emplace(A2, "a2");
+ secondary_register_names_.emplace(A3, "a3");
+ secondary_register_names_.emplace(A4, "a4");
+ secondary_register_names_.emplace(A5, "a5");
+ secondary_register_names_.emplace(A6, "a6");
+ secondary_register_names_.emplace(A7, "a7");
+ secondary_register_names_.emplace(S2, "s2");
+ secondary_register_names_.emplace(S3, "s3");
+ secondary_register_names_.emplace(S4, "s4");
+ secondary_register_names_.emplace(S5, "s5");
+ secondary_register_names_.emplace(S6, "s6");
+ secondary_register_names_.emplace(S7, "s7");
+ secondary_register_names_.emplace(S8, "s8");
+ secondary_register_names_.emplace(S9, "s9");
+ secondary_register_names_.emplace(S10, "s10");
+ secondary_register_names_.emplace(S11, "s11");
+ secondary_register_names_.emplace(T3, "t3");
+ secondary_register_names_.emplace(T4, "t4");
+ secondary_register_names_.emplace(T5, "t5");
+ secondary_register_names_.emplace(T6, "t6");
+ }
+ }
+
+ void TearDown() override {
+ AssemblerTest::TearDown();
+ }
+
+ std::vector<Riscv64Label> GetAddresses() override {
+ UNIMPLEMENTED(FATAL) << "Feature not implemented yet";
+ UNREACHABLE();
+ }
+
+ ArrayRef<const XRegister> GetRegisters() override {
+ static constexpr XRegister kXRegisters[] = {
+ Zero,
+ RA,
+ SP,
+ GP,
+ TP,
+ T0,
+ T1,
+ T2,
+ S0,
+ S1,
+ A0,
+ A1,
+ A2,
+ A3,
+ A4,
+ A5,
+ A6,
+ A7,
+ S2,
+ S3,
+ S4,
+ S5,
+ S6,
+ S7,
+ S8,
+ S9,
+ S10,
+ S11,
+ T3,
+ T4,
+ T5,
+ T6,
+ };
+ return ArrayRef<const XRegister>(kXRegisters);
+ }
+
+ ArrayRef<const FRegister> GetFPRegisters() override {
+ static constexpr FRegister kFRegisters[] = {
+ FT0,
+ FT1,
+ FT2,
+ FT3,
+ FT4,
+ FT5,
+ FT6,
+ FT7,
+ FS0,
+ FS1,
+ FA0,
+ FA1,
+ FA2,
+ FA3,
+ FA4,
+ FA5,
+ FA6,
+ FA7,
+ FS2,
+ FS3,
+ FS4,
+ FS5,
+ FS6,
+ FS7,
+ FS8,
+ FS9,
+ FS10,
+ FS11,
+ FT8,
+ FT9,
+ FT10,
+ FT11,
+ };
+ return ArrayRef<const FRegister>(kFRegisters);
+ }
+
+ std::string GetSecondaryRegisterName(const XRegister& reg) override {
+ CHECK(secondary_register_names_.find(reg) != secondary_register_names_.end());
+ return secondary_register_names_[reg];
+ }
+
+ int32_t CreateImmediate(int64_t imm_value) override {
+ return dchecked_integral_cast<int32_t>(imm_value);
+ }
+
+ template <typename Emit>
+ std::string RepeatInsn(size_t count, const std::string& insn, Emit&& emit) {
+ std::string result;
+ for (; count != 0u; --count) {
+ result += insn;
+ emit();
+ }
+ return result;
+ }
+
+ std::string EmitNops(size_t size) {
+ // TODO(riscv64): Support "C" Standard Extension.
+ DCHECK_ALIGNED(size, sizeof(uint32_t));
+ const size_t num_nops = size / sizeof(uint32_t);
+ return RepeatInsn(num_nops, "nop\n", [&]() { __ Nop(); });
+ }
+
+ template <typename EmitLoadConst>
+ void TestLoadConst64(const std::string& test_name,
+ bool can_use_tmp,
+ EmitLoadConst&& emit_load_const) {
+ std::string expected;
+ // Test standard immediates. Unlike other instructions, `Li()` accepts an `int64_t` but
+ // this is unsupported by `CreateImmediate()`, so we cannot use `RepeatRIb()` for these.
+ // Note: This `CreateImmediateValuesBits()` call does not produce any values where
+ // `LoadConst64()` would emit different code from `Li()`.
+ for (int64_t value : CreateImmediateValuesBits(64, /*as_uint=*/ false)) {
+ emit_load_const(A0, value);
+ expected += "li a0, " + std::to_string(value) + "\n";
+ }
+ // Test various registers with a few small values.
+ // (Even Zero is an accepted register even if that does not really load the requested value.)
+ for (XRegister reg : GetRegisters()) {
+ ScratchRegisterScope srs(GetAssembler());
+ srs.ExcludeXRegister(reg);
+ std::string rd = GetRegisterName(reg);
+ emit_load_const(reg, -1);
+ expected += "li " + rd + ", -1\n";
+ emit_load_const(reg, 0);
+ expected += "li " + rd + ", 0\n";
+ emit_load_const(reg, 1);
+ expected += "li " + rd + ", 1\n";
+ }
+ // Test some significant values. Some may just repeat the tests above but other values
+ // show some complex patterns, even exposing a value where clang (and therefore also this
+ // assembler) does not generate the shortest sequence.
+ // For the following values, `LoadConst64()` emits the same code as `Li()`.
+ int64_t test_values1[] = {
+ // Small values, either ADDI, ADDI+SLLI, LUI, or LUI+ADDIW.
+ // The ADDI+LUI is presumably used to allow shorter code for RV64C.
+ -4097, -4096, -4095, -2176, -2049, -2048, -2047, -1025, -1024, -1023, -2, -1,
+ 0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049, 2176, 4095, 4096, 4097,
+ // Just below std::numeric_limits<int32_t>::min()
+ INT64_C(-0x80000001), // LUI+ADDI
+ INT64_C(-0x80000800), // LUI+ADDI
+ INT64_C(-0x80000801), // LUI+ADDIW+SLLI+ADDI; LUI+ADDI+ADDI would be shorter.
+ INT64_C(-0x80000800123), // LUI+ADDIW+SLLI+ADDI
+ INT64_C(0x0123450000000123), // LUI+SLLI+ADDI
+ INT64_C(-0x7654300000000123), // LUI+SLLI+ADDI
+ INT64_C(0x0fffffffffff0000), // LUI+SRLI
+ INT64_C(0x0ffffffffffff000), // LUI+SRLI
+ INT64_C(0x0ffffffffffff010), // LUI+ADDIW+SRLI
+ INT64_C(0x0fffffffffffff10), // ADDI+SLLI+ADDI; LUI+ADDIW+SRLI would be same length.
+ INT64_C(0x0fffffffffffff80), // ADDI+SRLI
+ INT64_C(0x0ffffffff7ffff80), // LUI+ADDI+SRLI
+ INT64_C(0x0123450000001235), // LUI+SLLI+ADDI+SLLI+ADDI
+ INT64_C(0x0123450000001234), // LUI+SLLI+ADDI+SLLI
+ INT64_C(0x0000000fff808010), // LUI+SLLI+SRLI
+ INT64_C(0x00000000fff80801), // LUI+SLLI+SRLI
+ INT64_C(0x00000000ffffffff), // ADDI+SRLI
+ INT64_C(0x00000001ffffffff), // ADDI+SRLI
+ INT64_C(0x00000003ffffffff), // ADDI+SRLI
+ INT64_C(0x00000000ffc00801), // LUI+ADDIW+SLLI+ADDI
+ INT64_C(0x00000001fffff7fe), // ADDI+SLLI+SRLI
+ };
+ for (int64_t value : test_values1) {
+ emit_load_const(A0, value);
+ expected += "li a0, " + std::to_string(value) + "\n";
+ }
+ // For the following values, `LoadConst64()` emits different code than `Li()`.
+ std::pair<int64_t, const char*> test_values2[] = {
+ // Li: LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI
+ // LoadConst: LUI+ADDIW+LUI+ADDIW+SLLI+ADD (using TMP)
+ { INT64_C(0x1234567812345678),
+ "li {reg1}, 0x12345678 / 8\n" // Trailing zero bits in high word are handled by SLLI.
+ "li {reg2}, 0x12345678\n"
+ "slli {reg1}, {reg1}, 32 + 3\n"
+ "add {reg1}, {reg1}, {reg2}\n" },
+ { INT64_C(0x1234567887654321),
+ "li {reg1}, 0x12345678 + 1\n" // One higher to compensate for negative TMP.
+ "li {reg2}, 0x87654321 - 0x100000000\n"
+ "slli {reg1}, {reg1}, 32\n"
+ "add {reg1}, {reg1}, {reg2}\n" },
+ { INT64_C(-0x1234567887654321),
+ "li {reg1}, -0x12345678 - 1\n" // High 32 bits of the constant.
+ "li {reg2}, 0x100000000 - 0x87654321\n" // Low 32 bits of the constant.
+ "slli {reg1}, {reg1}, 32\n"
+ "add {reg1}, {reg1}, {reg2}\n" },
+
+ // Li: LUI+SLLI+ADDI+SLLI+ADDI+SLLI
+ // LoadConst: LUI+LUI+SLLI+ADD (using TMP)
+ { INT64_C(0x1234500012345000),
+ "lui {reg1}, 0x12345\n"
+ "lui {reg2}, 0x12345\n"
+ "slli {reg1}, {reg1}, 44 - 12\n"
+ "add {reg1}, {reg1}, {reg2}\n" },
+ { INT64_C(0x0123450012345000),
+ "lui {reg1}, 0x12345\n"
+ "lui {reg2}, 0x12345\n"
+ "slli {reg1}, {reg1}, 40 - 12\n"
+ "add {reg1}, {reg1}, {reg2}\n" },
+
+ // Li: LUI+ADDIW+SLLI+ADDI+SLLI+ADDI
+ // LoadConst: LUI+LUI+ADDIW+SLLI+ADD (using TMP)
+ { INT64_C(0x0001234512345678),
+ "lui {reg1}, 0x12345\n"
+ "li {reg2}, 0x12345678\n"
+ "slli {reg1}, {reg1}, 32 - 12\n"
+ "add {reg1}, {reg1}, {reg2}\n" },
+ { INT64_C(0x0012345012345678),
+ "lui {reg1}, 0x12345\n"
+ "li {reg2}, 0x12345678\n"
+ "slli {reg1}, {reg1}, 36 - 12\n"
+ "add {reg1}, {reg1}, {reg2}\n" },
+ };
+ for (auto [value, fmt] : test_values2) {
+ emit_load_const(A0, value);
+ if (can_use_tmp) {
+ std::string base = fmt;
+ ReplaceReg(REG1_TOKEN, GetRegisterName(A0), &base);
+ ReplaceReg(REG2_TOKEN, GetRegisterName(TMP), &base);
+ expected += base;
+ } else {
+ expected += "li a0, " + std::to_string(value) + "\n";
+ }
+ }
+
+ DriverStr(expected, test_name);
+ }
+
+ auto GetPrintBcond() {
+ return [](const std::string& cond,
+ [[maybe_unused]] const std::string& opposite_cond,
+ const std::string& args,
+ const std::string& target) {
+ return "b" + cond + args + ", " + target + "\n";
+ };
+ }
+
+ auto GetPrintBcondOppositeAndJ(const std::string& skip_label) {
+ return [=]([[maybe_unused]] const std::string& cond,
+ const std::string& opposite_cond,
+ const std::string& args,
+ const std::string& target) {
+ return "b" + opposite_cond + args + ", " + skip_label + "f\n" +
+ "j " + target + "\n" +
+ skip_label + ":\n";
+ };
+ }
+
+ auto GetPrintBcondOppositeAndTail(const std::string& skip_label, const std::string& base_label) {
+ return [=]([[maybe_unused]] const std::string& cond,
+ const std::string& opposite_cond,
+ const std::string& args,
+ const std::string& target) {
+ return "b" + opposite_cond + args + ", " + skip_label + "f\n" +
+ base_label + ":\n" +
+ "auipc t6, %pcrel_hi(" + target + ")\n" +
+ "jalr x0, %pcrel_lo(" + base_label + "b)(t6)\n" +
+ skip_label + ":\n";
+ };
+ }
+
+ // Helper function for basic tests that all branch conditions map to the correct opcodes,
+ // whether with branch expansion (a conditional branch with opposite condition over an
+ // unconditional branch) or without.
+ template <typename PrintBcond>
+ std::string EmitBcondForAllConditions(Riscv64Label* label,
+ const std::string& target,
+ PrintBcond&& print_bcond) {
+ XRegister rs = A0;
+ __ Beqz(rs, label);
+ __ Bnez(rs, label);
+ __ Blez(rs, label);
+ __ Bgez(rs, label);
+ __ Bltz(rs, label);
+ __ Bgtz(rs, label);
+ XRegister rt = A1;
+ __ Beq(rs, rt, label);
+ __ Bne(rs, rt, label);
+ __ Ble(rs, rt, label);
+ __ Bge(rs, rt, label);
+ __ Blt(rs, rt, label);
+ __ Bgt(rs, rt, label);
+ __ Bleu(rs, rt, label);
+ __ Bgeu(rs, rt, label);
+ __ Bltu(rs, rt, label);
+ __ Bgtu(rs, rt, label);
+
+ return
+ print_bcond("eq", "ne", "z a0", target) +
+ print_bcond("ne", "eq", "z a0", target) +
+ print_bcond("le", "gt", "z a0", target) +
+ print_bcond("ge", "lt", "z a0", target) +
+ print_bcond("lt", "ge", "z a0", target) +
+ print_bcond("gt", "le", "z a0", target) +
+ print_bcond("eq", "ne", " a0, a1", target) +
+ print_bcond("ne", "eq", " a0, a1", target) +
+ print_bcond("le", "gt", " a0, a1", target) +
+ print_bcond("ge", "lt", " a0, a1", target) +
+ print_bcond("lt", "ge", " a0, a1", target) +
+ print_bcond("gt", "le", " a0, a1", target) +
+ print_bcond("leu", "gtu", " a0, a1", target) +
+ print_bcond("geu", "ltu", " a0, a1", target) +
+ print_bcond("ltu", "geu", " a0, a1", target) +
+ print_bcond("gtu", "leu", " a0, a1", target);
+ }
+
+ // Test Bcond for forward branches with all conditions.
+ // The gap must be such that either all branches expand, or none does.
+ template <typename PrintBcond>
+ void TestBcondForward(const std::string& test_name,
+ size_t gap_size,
+ const std::string& target_label,
+ PrintBcond&& print_bcond) {
+ std::string expected;
+ Riscv64Label label;
+ expected += EmitBcondForAllConditions(&label, target_label + "f", print_bcond);
+ expected += EmitNops(gap_size);
+ __ Bind(&label);
+ expected += target_label + ":\n";
+ DriverStr(expected, test_name);
+ }
+
+ // Test Bcond for backward branches with all conditions.
+ // The gap must be such that either all branches expand, or none does.
+ template <typename PrintBcond>
+ void TestBcondBackward(const std::string& test_name,
+ size_t gap_size,
+ const std::string& target_label,
+ PrintBcond&& print_bcond) {
+ std::string expected;
+ Riscv64Label label;
+ __ Bind(&label);
+ expected += target_label + ":\n";
+ expected += EmitNops(gap_size);
+ expected += EmitBcondForAllConditions(&label, target_label + "b", print_bcond);
+ DriverStr(expected, test_name);
+ }
+
+ size_t MaxOffset13BackwardDistance() {
+ return 4 * KB;
+ }
+
+ size_t MaxOffset13ForwardDistance() {
+ // TODO(riscv64): Support "C" Standard Extension, max forward distance 4KiB - 2.
+ return 4 * KB - 4;
+ }
+
+ size_t MaxOffset21BackwardDistance() {
+ return 1 * MB;
+ }
+
+ size_t MaxOffset21ForwardDistance() {
+ // TODO(riscv64): Support "C" Standard Extension, max forward distance 1MiB - 2.
+ return 1 * MB - 4;
+ }
+
+ template <typename PrintBcond>
+ void TestBeqA0A1Forward(const std::string& test_name,
+ size_t nops_size,
+ const std::string& target_label,
+ PrintBcond&& print_bcond) {
+ std::string expected;
+ Riscv64Label label;
+ __ Beq(A0, A1, &label);
+ expected += print_bcond("eq", "ne", " a0, a1", target_label + "f");
+ expected += EmitNops(nops_size);
+ __ Bind(&label);
+ expected += target_label + ":\n";
+ DriverStr(expected, test_name);
+ }
+
+ template <typename PrintBcond>
+ void TestBeqA0A1Backward(const std::string& test_name,
+ size_t nops_size,
+ const std::string& target_label,
+ PrintBcond&& print_bcond) {
+ std::string expected;
+ Riscv64Label label;
+ __ Bind(&label);
+ expected += target_label + ":\n";
+ expected += EmitNops(nops_size);
+ __ Beq(A0, A1, &label);
+ expected += print_bcond("eq", "ne", " a0, a1", target_label + "b");
+ DriverStr(expected, test_name);
+ }
+
+ // Test a branch setup where expanding one branch causes expanding another branch
+ // which causes expanding another branch, etc. The argument `cascade` determines
+ // whether we push the first branch to expand, or not.
+ template <typename PrintBcond>
+ void TestBeqA0A1MaybeCascade(const std::string& test_name,
+ bool cascade,
+ PrintBcond&& print_bcond) {
+ const size_t kNumBeqs = MaxOffset13ForwardDistance() / sizeof(uint32_t) / 2u;
+ auto label_name = [](size_t i) { return ".L" + std::to_string(i); };
+
+ std::string expected;
+ std::vector<Riscv64Label> labels(kNumBeqs);
+ for (size_t i = 0; i != kNumBeqs; ++i) {
+ __ Beq(A0, A1, &labels[i]);
+ expected += print_bcond("eq", "ne", " a0, a1", label_name(i));
+ }
+ if (cascade) {
+ expected += EmitNops(sizeof(uint32_t));
+ }
+ for (size_t i = 0; i != kNumBeqs; ++i) {
+ expected += EmitNops(2 * sizeof(uint32_t));
+ __ Bind(&labels[i]);
+ expected += label_name(i) + ":\n";
+ }
+ DriverStr(expected, test_name);
+ }
+
+ auto GetPrintJalRd() {
+ return [=](XRegister rd, const std::string& target) {
+ std::string rd_name = GetRegisterName(rd);
+ return "jal " + rd_name + ", " + target + "\n";
+ };
+ }
+
+ auto GetPrintCallRd(const std::string& base_label) {
+ return [=](XRegister rd, const std::string& target) {
+ std::string rd_name = GetRegisterName(rd);
+ std::string temp_name = (rd != Zero) ? rd_name : GetRegisterName(TMP);
+ return base_label + ":\n" +
+ "auipc " + temp_name + ", %pcrel_hi(" + target + ")\n" +
+ "jalr " + rd_name + ", %pcrel_lo(" + base_label + "b)(" + temp_name + ")\n";
+ };
+ }
+
+ template <typename PrintJalRd>
+ void TestJalRdForward(const std::string& test_name,
+ size_t gap_size,
+ const std::string& label_name,
+ PrintJalRd&& print_jalrd) {
+ std::string expected;
+ Riscv64Label label;
+ for (XRegister reg : GetRegisters()) {
+ __ Jal(reg, &label);
+ expected += print_jalrd(reg, label_name + "f");
+ }
+ expected += EmitNops(gap_size);
+ __ Bind(&label);
+ expected += label_name + ":\n";
+ DriverStr(expected, test_name);
+ }
+
+ template <typename PrintJalRd>
+ void TestJalRdBackward(const std::string& test_name,
+ size_t gap_size,
+ const std::string& label_name,
+ PrintJalRd&& print_jalrd) {
+ std::string expected;
+ Riscv64Label label;
+ __ Bind(&label);
+ expected += label_name + ":\n";
+ expected += EmitNops(gap_size);
+ for (XRegister reg : GetRegisters()) {
+ __ Jal(reg, &label);
+ expected += print_jalrd(reg, label_name + "b");
+ }
+ DriverStr(expected, test_name);
+ }
+
+ auto GetEmitJ() {
+ return [=](Riscv64Label* label) { __ J(label); };
+ }
+
+ auto GetEmitJal() {
+ return [=](Riscv64Label* label) { __ Jal(label); };
+ }
+
+ auto GetPrintJ() {
+ return [=](const std::string& target) {
+ return "j " + target + "\n";
+ };
+ }
+
+ auto GetPrintJal() {
+ return [=](const std::string& target) {
+ return "jal " + target + "\n";
+ };
+ }
+
+ auto GetPrintTail(const std::string& base_label) {
+ return [=](const std::string& target) {
+ return base_label + ":\n" +
+ "auipc t6, %pcrel_hi(" + target + ")\n" +
+ "jalr x0, %pcrel_lo(" + base_label + "b)(t6)\n";
+ };
+ }
+
+ auto GetPrintCall(const std::string& base_label) {
+ return [=](const std::string& target) {
+ return base_label + ":\n" +
+ "auipc ra, %pcrel_hi(" + target + ")\n" +
+ "jalr ra, %pcrel_lo(" + base_label + "b)(ra)\n";
+ };
+ }
+
+ template <typename EmitBuncond, typename PrintBuncond>
+ void TestBuncondForward(const std::string& test_name,
+ size_t gap_size,
+ const std::string& label_name,
+ EmitBuncond&& emit_buncond,
+ PrintBuncond&& print_buncond) {
+ std::string expected;
+ Riscv64Label label;
+ emit_buncond(&label);
+ expected += print_buncond(label_name + "f");
+ expected += EmitNops(gap_size);
+ __ Bind(&label);
+ expected += label_name + ":\n";
+ DriverStr(expected, test_name);
+ }
+
+ template <typename EmitBuncond, typename PrintBuncond>
+ void TestBuncondBackward(const std::string& test_name,
+ size_t gap_size,
+ const std::string& label_name,
+ EmitBuncond&& emit_buncond,
+ PrintBuncond&& print_buncond) {
+ std::string expected;
+ Riscv64Label label;
+ __ Bind(&label);
+ expected += label_name + ":\n";
+ expected += EmitNops(gap_size);
+ emit_buncond(&label);
+ expected += print_buncond(label_name + "b");
+ DriverStr(expected, test_name);
+ }
+
+ template <typename EmitOp>
+ void TestAddConst(const std::string& test_name,
+ size_t bits,
+ const std::string& suffix,
+ EmitOp&& emit_op) {
+ int64_t kImm12s[] = {
+ 0, 1, 2, 0xff, 0x100, 0x1ff, 0x200, 0x3ff, 0x400, 0x7ff,
+ -1, -2, -0x100, -0x101, -0x200, -0x201, -0x400, -0x401, -0x800,
+ };
+ int64_t kSimplePositiveValues[] = {
+ 0x800, 0x801, 0xbff, 0xc00, 0xff0, 0xff7, 0xff8, 0xffb, 0xffc, 0xffd, 0xffe,
+ };
+ int64_t kSimpleNegativeValues[] = {
+ -0x801, -0x802, -0xbff, -0xc00, -0xff0, -0xff8, -0xffc, -0xffe, -0xfff, -0x1000,
+ };
+ std::vector<int64_t> large_values = CreateImmediateValuesBits(bits, /*as_uint=*/ false);
+ auto kept_end = std::remove_if(large_values.begin(),
+ large_values.end(),
+ [](int64_t value) { return IsInt<13>(value); });
+ large_values.erase(kept_end, large_values.end());
+ large_values.push_back(0xfff);
+
+ std::string expected;
+ for (XRegister rd : GetRegisters()) {
+ std::string rd_name = GetRegisterName(rd);
+ std::string addi_rd = ART_FORMAT("addi{} {}, ", suffix, rd_name);
+ std::string add_rd = ART_FORMAT("add{} {}, ", suffix, rd_name);
+ for (XRegister rs1 : GetRegisters()) {
+ ScratchRegisterScope srs(GetAssembler());
+ srs.ExcludeXRegister(rs1);
+ srs.ExcludeXRegister(rd);
+
+ std::string rs1_name = GetRegisterName(rs1);
+ std::string tmp_name = GetRegisterName((rs1 != TMP) ? TMP : TMP2);
+ std::string addi_tmp = ART_FORMAT("addi{} {}, ", suffix, tmp_name);
+
+ for (int64_t imm : kImm12s) {
+ emit_op(rd, rs1, imm);
+ expected += ART_FORMAT("{}{}, {}\n", addi_rd, rs1_name, std::to_string(imm));
+ }
+
+ auto emit_simple_ops = [&](ArrayRef<const int64_t> imms, int64_t adjustment) {
+ for (int64_t imm : imms) {
+ emit_op(rd, rs1, imm);
+ expected += ART_FORMAT("{}{}, {}\n", addi_tmp, rs1_name, std::to_string(adjustment));
+ expected +=
+ ART_FORMAT("{}{}, {}\n", addi_rd, tmp_name, std::to_string(imm - adjustment));
+ }
+ };
+ emit_simple_ops(ArrayRef<const int64_t>(kSimplePositiveValues), 0x7ff);
+ emit_simple_ops(ArrayRef<const int64_t>(kSimpleNegativeValues), -0x800);
+
+ for (int64_t imm : large_values) {
+ emit_op(rd, rs1, imm);
+ expected += ART_FORMAT("li {}, {}\n", tmp_name, std::to_string(imm));
+ expected += ART_FORMAT("{}{}, {}\n", add_rd, rs1_name, tmp_name);
+ }
+ }
+ }
+ DriverStr(expected, test_name);
+ }
+
+ template <typename GetTemp, typename EmitOp>
+ std::string RepeatLoadStoreArbitraryOffset(const std::string& head,
+ GetTemp&& get_temp,
+ EmitOp&& emit_op) {
+ int64_t kImm12s[] = {
+ 0, 1, 2, 0xff, 0x100, 0x1ff, 0x200, 0x3ff, 0x400, 0x7ff,
+ -1, -2, -0x100, -0x101, -0x200, -0x201, -0x400, -0x401, -0x800,
+ };
+ int64_t kSimplePositiveOffsetsAlign8[] = {
+ 0x800, 0x801, 0xbff, 0xc00, 0xff0, 0xff4, 0xff6, 0xff7
+ };
+ int64_t kSimplePositiveOffsetsAlign4[] = {
+ 0xff8, 0xff9, 0xffa, 0xffb
+ };
+ int64_t kSimplePositiveOffsetsAlign2[] = {
+ 0xffc, 0xffd
+ };
+ int64_t kSimplePositiveOffsetsNoAlign[] = {
+ 0xffe
+ };
+ int64_t kSimpleNegativeOffsets[] = {
+ -0x801, -0x802, -0xbff, -0xc00, -0xff0, -0xff8, -0xffc, -0xffe, -0xfff, -0x1000,
+ };
+ int64_t kSplitOffsets[] = {
+ 0xfff, 0x1000, 0x1001, 0x17ff, 0x1800, 0x1fff, 0x2000, 0x2001, 0x27ff, 0x2800,
+ 0x7fffe7ff, 0x7fffe800, 0x7fffefff, 0x7ffff000, 0x7ffff001, 0x7ffff7ff,
+ -0x1001, -0x1002, -0x17ff, -0x1800, -0x1801, -0x2000, -0x2001, -0x2800, -0x2801,
+ -0x7ffff000, -0x7ffff001, -0x7ffff800, -0x7ffff801, -0x7fffffff, -0x80000000,
+ };
+ int64_t kSpecialOffsets[] = {
+ 0x7ffff800, 0x7ffff801, 0x7ffffffe, 0x7fffffff
+ };
+
+ std::string expected;
+ for (XRegister rs1 : GetRegisters()) {
+ XRegister tmp = get_temp(rs1);
+ if (tmp == kNoXRegister) {
+ continue; // Unsupported register combination.
+ }
+ std::string tmp_name = GetRegisterName(tmp);
+ ScratchRegisterScope srs(GetAssembler());
+ srs.ExcludeXRegister(rs1);
+ std::string rs1_name = GetRegisterName(rs1);
+
+ for (int64_t imm : kImm12s) {
+ emit_op(rs1, imm);
+ expected += ART_FORMAT("{}, {}({})\n", head, std::to_string(imm), rs1_name);
+ }
+
+ auto emit_simple_ops = [&](ArrayRef<const int64_t> imms, int64_t adjustment) {
+ for (int64_t imm : imms) {
+ emit_op(rs1, imm);
+ expected +=
+ ART_FORMAT("addi {}, {}, {}\n", tmp_name, rs1_name, std::to_string(adjustment));
+ expected += ART_FORMAT("{}, {}({})\n", head, std::to_string(imm - adjustment), tmp_name);
+ }
+ };
+ emit_simple_ops(ArrayRef<const int64_t>(kSimplePositiveOffsetsAlign8), 0x7f8);
+ emit_simple_ops(ArrayRef<const int64_t>(kSimplePositiveOffsetsAlign4), 0x7fc);
+ emit_simple_ops(ArrayRef<const int64_t>(kSimplePositiveOffsetsAlign2), 0x7fe);
+ emit_simple_ops(ArrayRef<const int64_t>(kSimplePositiveOffsetsNoAlign), 0x7ff);
+ emit_simple_ops(ArrayRef<const int64_t>(kSimpleNegativeOffsets), -0x800);
+
+ for (int64_t imm : kSplitOffsets) {
+ emit_op(rs1, imm);
+ uint32_t imm20 = ((imm >> 12) + ((imm >> 11) & 1)) & 0xfffff;
+ int32_t small_offset = (imm & 0xfff) - ((imm & 0x800) << 1);
+ expected += ART_FORMAT("lui {}, {}\n", tmp_name, std::to_string(imm20));
+ expected += ART_FORMAT("add {}, {}, {}\n", tmp_name, tmp_name, rs1_name);
+ expected += ART_FORMAT("{},{}({})\n", head, std::to_string(small_offset), tmp_name);
+ }
+
+ for (int64_t imm : kSpecialOffsets) {
+ emit_op(rs1, imm);
+ expected += ART_FORMAT("lui {}, 0x80000\n", tmp_name);
+ expected +=
+ ART_FORMAT("addiw {}, {}, {}\n", tmp_name, tmp_name, std::to_string(imm - 0x80000000));
+ expected += ART_FORMAT("add {}, {}, {}\n", tmp_name, tmp_name, rs1_name);
+ expected += ART_FORMAT("{}, ({})\n", head, tmp_name);
+ }
+ }
+ return expected;
+ }
+
+ void TestLoadStoreArbitraryOffset(const std::string& test_name,
+ const std::string& insn,
+ void (Riscv64Assembler::*fn)(XRegister, XRegister, int32_t),
+ bool is_store) {
+ std::string expected;
+ for (XRegister rd : GetRegisters()) {
+ ScratchRegisterScope srs(GetAssembler());
+ srs.ExcludeXRegister(rd);
+ auto get_temp = [&](XRegister rs1) {
+ if (is_store) {
+ return (rs1 != TMP && rd != TMP)
+ ? TMP
+ : (rs1 != TMP2 && rd != TMP2) ? TMP2 : kNoXRegister;
+ } else {
+ return rs1 != TMP ? TMP : TMP2;
+ }
+ };
+ expected += RepeatLoadStoreArbitraryOffset(
+ insn + " " + GetRegisterName(rd),
+ get_temp,
+ [&](XRegister rs1, int64_t offset) { (GetAssembler()->*fn)(rd, rs1, offset); });
+ }
+ DriverStr(expected, test_name);
+ }
+
+ void TestFPLoadStoreArbitraryOffset(const std::string& test_name,
+ const std::string& insn,
+ void (Riscv64Assembler::*fn)(FRegister, XRegister, int32_t)) {
+ std::string expected;
+ for (FRegister rd : GetFPRegisters()) {
+ expected += RepeatLoadStoreArbitraryOffset(
+ insn + " " + GetFPRegName(rd),
+ [&](XRegister rs1) { return rs1 != TMP ? TMP : TMP2; },
+ [&](XRegister rs1, int64_t offset) { (GetAssembler()->*fn)(rd, rs1, offset); });
+ }
+ DriverStr(expected, test_name);
+ }
+
+ void TestLoadLiteral(const std::string& test_name, bool with_padding_for_long) {
+ std::string expected;
+ Literal* narrow_literal = __ NewLiteral<uint32_t>(0x12345678);
+ Literal* wide_literal = __ NewLiteral<uint64_t>(0x1234567887654321);
+ auto print_load = [&](const std::string& load, XRegister rd, const std::string& label) {
+ std::string rd_name = GetRegisterName(rd);
+ expected += "1:\n"
+ "auipc " + rd_name + ", %pcrel_hi(" + label + "f)\n" +
+ load + " " + rd_name + ", %pcrel_lo(1b)(" + rd_name + ")\n";
+ };
+ for (XRegister reg : GetRegisters()) {
+ if (reg != Zero) {
+ __ Loadw(reg, narrow_literal);
+ print_load("lw", reg, "2");
+ __ Loadwu(reg, narrow_literal);
+ print_load("lwu", reg, "2");
+ __ Loadd(reg, wide_literal);
+ print_load("ld", reg, "3");
+ }
+ }
+ std::string tmp = GetRegisterName(TMP);
+ auto print_fp_load = [&](const std::string& load, FRegister rd, const std::string& label) {
+ std::string rd_name = GetFPRegName(rd);
+ expected += "1:\n"
+ "auipc " + tmp + ", %pcrel_hi(" + label + "f)\n" +
+ load + " " + rd_name + ", %pcrel_lo(1b)(" + tmp + ")\n";
+ };
+ for (FRegister freg : GetFPRegisters()) {
+ __ FLoadw(freg, narrow_literal);
+ print_fp_load("flw", freg, "2");
+ __ FLoadd(freg, wide_literal);
+ print_fp_load("fld", freg, "3");
+ }
+ // All literal loads above emit 8 bytes of code. The narrow literal shall emit 4 bytes of code.
+ // If we do not add another instruction, we shall end up with padding before the long literal.
+ expected += EmitNops(with_padding_for_long ? 0u : sizeof(uint32_t));
+ expected += "2:\n"
+ ".4byte 0x12345678\n" +
+ std::string(with_padding_for_long ? ".4byte 0\n" : "") +
+ "3:\n"
+ ".8byte 0x1234567887654321\n";
+ DriverStr(expected, test_name);
+ }
+
+ std::string RepeatFFFFRoundingMode(
+ void (Riscv64Assembler::*f)(FRegister, FRegister, FRegister, FRegister, FPRoundingMode),
+ const std::string& fmt) {
+ CHECK(f != nullptr);
+ std::string str;
+ for (FRegister reg1 : GetFPRegisters()) {
+ for (FRegister reg2 : GetFPRegisters()) {
+ for (FRegister reg3 : GetFPRegisters()) {
+ for (FRegister reg4 : GetFPRegisters()) {
+ for (FPRoundingMode rm : kRoundingModes) {
+ (GetAssembler()->*f)(reg1, reg2, reg3, reg4, rm);
+
+ std::string base = fmt;
+ ReplaceReg(REG1_TOKEN, GetFPRegName(reg1), &base);
+ ReplaceReg(REG2_TOKEN, GetFPRegName(reg2), &base);
+ ReplaceReg(REG3_TOKEN, GetFPRegName(reg3), &base);
+ ReplaceReg(REG4_TOKEN, GetFPRegName(reg4), &base);
+ ReplaceRoundingMode(rm, &base);
+ str += base;
+ str += "\n";
+ }
+ }
+ }
+ }
+ }
+ return str;
+ }
+
+ std::string RepeatFFFRoundingMode(
+ void (Riscv64Assembler::*f)(FRegister, FRegister, FRegister, FPRoundingMode),
+ const std::string& fmt) {
+ CHECK(f != nullptr);
+ std::string str;
+ for (FRegister reg1 : GetFPRegisters()) {
+ for (FRegister reg2 : GetFPRegisters()) {
+ for (FRegister reg3 : GetFPRegisters()) {
+ for (FPRoundingMode rm : kRoundingModes) {
+ (GetAssembler()->*f)(reg1, reg2, reg3, rm);
+
+ std::string base = fmt;
+ ReplaceReg(REG1_TOKEN, GetFPRegName(reg1), &base);
+ ReplaceReg(REG2_TOKEN, GetFPRegName(reg2), &base);
+ ReplaceReg(REG3_TOKEN, GetFPRegName(reg3), &base);
+ ReplaceRoundingMode(rm, &base);
+ str += base;
+ str += "\n";
+ }
+ }
+ }
+ }
+ return str;
+ }
+
+ template <typename Reg1, typename Reg2>
+ std::string RepeatTemplatedRegistersRoundingMode(
+ void (Riscv64Assembler::*f)(Reg1, Reg2, FPRoundingMode),
+ ArrayRef<const Reg1> reg1_registers,
+ ArrayRef<const Reg2> reg2_registers,
+ std::string (Base::*GetName1)(const Reg1&),
+ std::string (Base::*GetName2)(const Reg2&),
+ const std::string& fmt) {
+ CHECK(f != nullptr);
+ std::string str;
+ for (Reg1 reg1 : reg1_registers) {
+ for (Reg2 reg2 : reg2_registers) {
+ for (FPRoundingMode rm : kRoundingModes) {
+ (GetAssembler()->*f)(reg1, reg2, rm);
+
+ std::string base = fmt;
+ ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base);
+ ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base);
+ ReplaceRoundingMode(rm, &base);
+ str += base;
+ str += "\n";
+ }
+ }
+ }
+ return str;
+ }
+
+ std::string RepeatFFRoundingMode(
+ void (Riscv64Assembler::*f)(FRegister, FRegister, FPRoundingMode),
+ const std::string& fmt) {
+ return RepeatTemplatedRegistersRoundingMode(f,
+ GetFPRegisters(),
+ GetFPRegisters(),
+ &AssemblerRISCV64Test::GetFPRegName,
+ &AssemblerRISCV64Test::GetFPRegName,
+ fmt);
+ }
+
+ std::string RepeatrFRoundingMode(
+ void (Riscv64Assembler::*f)(XRegister, FRegister, FPRoundingMode),
+ const std::string& fmt) {
+ return RepeatTemplatedRegistersRoundingMode(f,
+ GetRegisters(),
+ GetFPRegisters(),
+ &Base::GetSecondaryRegisterName,
+ &AssemblerRISCV64Test::GetFPRegName,
+ fmt);
+ }
+
+ std::string RepeatFrRoundingMode(
+ void (Riscv64Assembler::*f)(FRegister, XRegister, FPRoundingMode),
+ const std::string& fmt) {
+ return RepeatTemplatedRegistersRoundingMode(f,
+ GetFPRegisters(),
+ GetRegisters(),
+ &AssemblerRISCV64Test::GetFPRegName,
+ &Base::GetSecondaryRegisterName,
+ fmt);
+ }
+
+ template <typename InvalidAqRl>
+ std::string RepeatRRAqRl(void (Riscv64Assembler::*f)(XRegister, XRegister, AqRl),
+ const std::string& fmt,
+ InvalidAqRl&& invalid_aqrl) {
+ CHECK(f != nullptr);
+ std::string str;
+ for (XRegister reg1 : GetRegisters()) {
+ for (XRegister reg2 : GetRegisters()) {
+ for (AqRl aqrl : kAqRls) {
+ if (invalid_aqrl(aqrl)) {
+ continue;
+ }
+ (GetAssembler()->*f)(reg1, reg2, aqrl);
+
+ std::string base = fmt;
+ ReplaceReg(REG1_TOKEN, GetRegisterName(reg1), &base);
+ ReplaceReg(REG2_TOKEN, GetRegisterName(reg2), &base);
+ ReplaceAqRl(aqrl, &base);
+ str += base;
+ str += "\n";
+ }
+ }
+ }
+ return str;
+ }
+
+ template <typename InvalidAqRl>
+ std::string RepeatRRRAqRl(void (Riscv64Assembler::*f)(XRegister, XRegister, XRegister, AqRl),
+ const std::string& fmt,
+ InvalidAqRl&& invalid_aqrl) {
+ CHECK(f != nullptr);
+ std::string str;
+ for (XRegister reg1 : GetRegisters()) {
+ for (XRegister reg2 : GetRegisters()) {
+ for (XRegister reg3 : GetRegisters()) {
+ for (AqRl aqrl : kAqRls) {
+ if (invalid_aqrl(aqrl)) {
+ continue;
+ }
+ (GetAssembler()->*f)(reg1, reg2, reg3, aqrl);
+
+ std::string base = fmt;
+ ReplaceReg(REG1_TOKEN, GetRegisterName(reg1), &base);
+ ReplaceReg(REG2_TOKEN, GetRegisterName(reg2), &base);
+ ReplaceReg(REG3_TOKEN, GetRegisterName(reg3), &base);
+ ReplaceAqRl(aqrl, &base);
+ str += base;
+ str += "\n";
+ }
+ }
+ }
+ }
+ return str;
+ }
+
+ std::string RepeatRRRAqRl(void (Riscv64Assembler::*f)(XRegister, XRegister, XRegister, AqRl),
+ const std::string& fmt) {
+ return RepeatRRRAqRl(f, fmt, [](AqRl) { return false; });
+ }
+
+ std::string RepeatCsrrX(void (Riscv64Assembler::*f)(XRegister, uint32_t, XRegister),
+ const std::string& fmt) {
+ CHECK(f != nullptr);
+ std::vector<int64_t> csrs = CreateImmediateValuesBits(12, /*as_uint=*/ true);
+ std::string str;
+ for (XRegister reg1 : GetRegisters()) {
+ for (int64_t csr : csrs) {
+ for (XRegister reg2 : GetRegisters()) {
+ (GetAssembler()->*f)(reg1, dchecked_integral_cast<uint32_t>(csr), reg2);
+
+ std::string base = fmt;
+ ReplaceReg(REG1_TOKEN, GetRegisterName(reg1), &base);
+ ReplaceCsrrImm(CSR_TOKEN, csr, &base);
+ ReplaceReg(REG2_TOKEN, GetRegisterName(reg2), &base);
+ str += base;
+ str += "\n";
+ }
+ }
+ }
+ return str;
+ }
+
+ std::string RepeatCsrrXi(void (Riscv64Assembler::*f)(XRegister, uint32_t, uint32_t),
+ const std::string& fmt) {
+ CHECK(f != nullptr);
+ std::vector<int64_t> csrs = CreateImmediateValuesBits(12, /*as_uint=*/ true);
+ std::vector<int64_t> uimms = CreateImmediateValuesBits(2, /*as_uint=*/ true);
+ std::string str;
+ for (XRegister reg : GetRegisters()) {
+ for (int64_t csr : csrs) {
+ for (int64_t uimm : uimms) {
+ (GetAssembler()->*f)(
+ reg, dchecked_integral_cast<uint32_t>(csr), dchecked_integral_cast<uint32_t>(uimm));
+
+ std::string base = fmt;
+ ReplaceReg(REG_TOKEN, GetRegisterName(reg), &base);
+ ReplaceCsrrImm(CSR_TOKEN, csr, &base);
+ ReplaceCsrrImm(UIMM_TOKEN, uimm, &base);
+ str += base;
+ str += "\n";
+ }
+ }
+ }
+ return str;
+ }
+
+ template <typename EmitCssrX>
+ void TestCsrrXMacro(const std::string& test_name,
+ const std::string& fmt,
+ EmitCssrX&& emit_csrrx) {
+ std::vector<int64_t> csrs = CreateImmediateValuesBits(12, /*as_uint=*/ true);
+ std::string expected;
+ for (XRegister reg : GetRegisters()) {
+ for (int64_t csr : csrs) {
+ emit_csrrx(dchecked_integral_cast<uint32_t>(csr), reg);
+
+ std::string base = fmt;
+ ReplaceReg(REG_TOKEN, GetRegisterName(reg), &base);
+ ReplaceCsrrImm(CSR_TOKEN, csr, &base);
+ expected += base;
+ expected += "\n";
+ }
+ }
+ DriverStr(expected, test_name);
+ }
+
+ template <typename EmitCssrXi>
+ void TestCsrrXiMacro(const std::string& test_name,
+ const std::string& fmt,
+ EmitCssrXi&& emit_csrrxi) {
+ std::vector<int64_t> csrs = CreateImmediateValuesBits(12, /*as_uint=*/ true);
+ std::vector<int64_t> uimms = CreateImmediateValuesBits(2, /*as_uint=*/ true);
+ std::string expected;
+ for (int64_t csr : csrs) {
+ for (int64_t uimm : uimms) {
+ emit_csrrxi(dchecked_integral_cast<uint32_t>(csr), dchecked_integral_cast<uint32_t>(uimm));
+
+ std::string base = fmt;
+ ReplaceCsrrImm(CSR_TOKEN, csr, &base);
+ ReplaceCsrrImm(UIMM_TOKEN, uimm, &base);
+ expected += base;
+ expected += "\n";
+ }
+ }
+ DriverStr(expected, test_name);
+ }
+
+ private:
+ static constexpr const char* RM_TOKEN = "{rm}";
+ static constexpr const char* AQRL_TOKEN = "{aqrl}";
+ static constexpr const char* CSR_TOKEN = "{csr}";
+ static constexpr const char* UIMM_TOKEN = "{uimm}";
+
+ static constexpr AqRl kAqRls[] = { AqRl::kNone, AqRl::kRelease, AqRl::kAcquire, AqRl::kAqRl };
+
+ static constexpr FPRoundingMode kRoundingModes[] = {
+ FPRoundingMode::kRNE,
+ FPRoundingMode::kRTZ,
+ FPRoundingMode::kRDN,
+ FPRoundingMode::kRUP,
+ FPRoundingMode::kRMM,
+ FPRoundingMode::kDYN
+ };
+
+ void ReplaceRoundingMode(FPRoundingMode rm, /*inout*/ std::string* str) {
+ const char* replacement;
+ switch (rm) {
+ case FPRoundingMode::kRNE:
+ replacement = "rne";
+ break;
+ case FPRoundingMode::kRTZ:
+ replacement = "rtz";
+ break;
+ case FPRoundingMode::kRDN:
+ replacement = "rdn";
+ break;
+ case FPRoundingMode::kRUP:
+ replacement = "rup";
+ break;
+ case FPRoundingMode::kRMM:
+ replacement = "rmm";
+ break;
+ case FPRoundingMode::kDYN:
+ replacement = "dyn";
+ break;
+ default:
+ LOG(FATAL) << "Unexpected value for rm: " << enum_cast<uint32_t>(rm);
+ UNREACHABLE();
+ }
+ size_t rm_index = str->find(RM_TOKEN);
+ EXPECT_NE(rm_index, std::string::npos);
+ if (rm_index != std::string::npos) {
+ str->replace(rm_index, ConstexprStrLen(RM_TOKEN), replacement);
+ }
+ }
+
+ void ReplaceAqRl(AqRl aqrl, /*inout*/ std::string* str) {
+ const char* replacement;
+ switch (aqrl) {
+ case AqRl::kNone:
+ replacement = "";
+ break;
+ case AqRl::kRelease:
+ replacement = ".rl";
+ break;
+ case AqRl::kAcquire:
+ replacement = ".aq";
+ break;
+ case AqRl::kAqRl:
+ replacement = ".aqrl";
+ break;
+ default:
+ LOG(FATAL) << "Unexpected value for `aqrl`: " << enum_cast<uint32_t>(aqrl);
+ UNREACHABLE();
+ }
+ size_t aqrl_index = str->find(AQRL_TOKEN);
+ EXPECT_NE(aqrl_index, std::string::npos);
+ if (aqrl_index != std::string::npos) {
+ str->replace(aqrl_index, ConstexprStrLen(AQRL_TOKEN), replacement);
+ }
+ }
+
+ static void ReplaceCsrrImm(const std::string& imm_token,
+ int64_t imm,
+ /*inout*/ std::string* str) {
+ size_t imm_index = str->find(imm_token);
+ EXPECT_NE(imm_index, std::string::npos);
+ if (imm_index != std::string::npos) {
+ str->replace(imm_index, imm_token.length(), std::to_string(imm));
+ }
+ }
+
+ std::map<XRegister, std::string, RISCV64CpuRegisterCompare> secondary_register_names_;
+
+ std::unique_ptr<const Riscv64InstructionSetFeatures> instruction_set_features_;
+ bool use_simple_march_ = false;
+};
+
+TEST_F(AssemblerRISCV64Test, Toolchain) { EXPECT_TRUE(CheckTools()); }
+
+TEST_F(AssemblerRISCV64Test, Lui) {
+ DriverStr(RepeatRIb(&Riscv64Assembler::Lui, 20, "lui {reg}, {imm}"), "Lui");
+}
+
+TEST_F(AssemblerRISCV64Test, Auipc) {
+ DriverStr(RepeatRIb(&Riscv64Assembler::Auipc, 20, "auipc {reg}, {imm}"), "Auipc");
+}
+
+TEST_F(AssemblerRISCV64Test, Jal) {
+ // TODO(riscv64): Change "-19, 2" to "-20, 1" for "C" Standard Extension.
+ DriverStr(RepeatRIbS(&Riscv64Assembler::Jal, -19, 2, "jal {reg}, {imm}\n"), "Jal");
+}
+
+TEST_F(AssemblerRISCV64Test, Jalr) {
+ // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Jalr, -12, "jalr {reg1}, {reg2}, {imm}\n"), "Jalr");
+}
+
+TEST_F(AssemblerRISCV64Test, Beq) {
+ // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+ DriverStr(RepeatRRIbS(&Riscv64Assembler::Beq, -11, 2, "beq {reg1}, {reg2}, {imm}\n"), "Beq");
+}
+
+TEST_F(AssemblerRISCV64Test, Bne) {
+ // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+ DriverStr(RepeatRRIbS(&Riscv64Assembler::Bne, -11, 2, "bne {reg1}, {reg2}, {imm}\n"), "Bne");
+}
+
+TEST_F(AssemblerRISCV64Test, Blt) {
+ // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+ DriverStr(RepeatRRIbS(&Riscv64Assembler::Blt, -11, 2, "blt {reg1}, {reg2}, {imm}\n"), "Blt");
+}
+
+TEST_F(AssemblerRISCV64Test, Bge) {
+ // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+ DriverStr(RepeatRRIbS(&Riscv64Assembler::Bge, -11, 2, "bge {reg1}, {reg2}, {imm}\n"), "Bge");
+}
+
+TEST_F(AssemblerRISCV64Test, Bltu) {
+ // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+ DriverStr(RepeatRRIbS(&Riscv64Assembler::Bltu, -11, 2, "bltu {reg1}, {reg2}, {imm}\n"), "Bltu");
+}
+
+TEST_F(AssemblerRISCV64Test, Bgeu) {
+ // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+ DriverStr(RepeatRRIbS(&Riscv64Assembler::Bgeu, -11, 2, "bgeu {reg1}, {reg2}, {imm}\n"), "Bgeu");
+}
+
+TEST_F(AssemblerRISCV64Test, Lb) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Lb, -12, "lb {reg1}, {imm}({reg2})"), "Lb");
+}
+
+TEST_F(AssemblerRISCV64Test, Lh) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Lh, -12, "lh {reg1}, {imm}({reg2})"), "Lh");
+}
+
+TEST_F(AssemblerRISCV64Test, Lw) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Lw, -12, "lw {reg1}, {imm}({reg2})"), "Lw");
+}
+
+TEST_F(AssemblerRISCV64Test, Ld) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Ld, -12, "ld {reg1}, {imm}({reg2})"), "Ld");
+}
+
+TEST_F(AssemblerRISCV64Test, Lbu) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Lbu, -12, "lbu {reg1}, {imm}({reg2})"), "Lbu");
+}
+
+TEST_F(AssemblerRISCV64Test, Lhu) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Lhu, -12, "lhu {reg1}, {imm}({reg2})"), "Lhu");
+}
+
+TEST_F(AssemblerRISCV64Test, Lwu) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Lwu, -12, "lwu {reg1}, {imm}({reg2})"), "Lwu");
+}
+
+TEST_F(AssemblerRISCV64Test, Sb) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Sb, -12, "sb {reg1}, {imm}({reg2})"), "Sb");
+}
+
+TEST_F(AssemblerRISCV64Test, Sh) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Sh, -12, "sh {reg1}, {imm}({reg2})"), "Sh");
+}
+
+TEST_F(AssemblerRISCV64Test, Sw) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Sw, -12, "sw {reg1}, {imm}({reg2})"), "Sw");
+}
+
+TEST_F(AssemblerRISCV64Test, Sd) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Sd, -12, "sd {reg1}, {imm}({reg2})"), "Sd");
+}
+
+TEST_F(AssemblerRISCV64Test, Addi) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Addi, -12, "addi {reg1}, {reg2}, {imm}"), "Addi");
+}
+
+TEST_F(AssemblerRISCV64Test, Slti) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Slti, -12, "slti {reg1}, {reg2}, {imm}"), "Slti");
+}
+
+TEST_F(AssemblerRISCV64Test, Sltiu) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Sltiu, -12, "sltiu {reg1}, {reg2}, {imm}"), "Sltiu");
+}
+
+TEST_F(AssemblerRISCV64Test, Xori) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Xori, 11, "xori {reg1}, {reg2}, {imm}"), "Xori");
+}
+
+TEST_F(AssemblerRISCV64Test, Ori) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Ori, -12, "ori {reg1}, {reg2}, {imm}"), "Ori");
+}
+
+TEST_F(AssemblerRISCV64Test, Andi) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Andi, -12, "andi {reg1}, {reg2}, {imm}"), "Andi");
+}
+
+TEST_F(AssemblerRISCV64Test, Slli) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Slli, 6, "slli {reg1}, {reg2}, {imm}"), "Slli");
+}
+
+TEST_F(AssemblerRISCV64Test, Srli) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Srli, 6, "srli {reg1}, {reg2}, {imm}"), "Srli");
+}
+
+TEST_F(AssemblerRISCV64Test, Srai) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Srai, 6, "srai {reg1}, {reg2}, {imm}"), "Srai");
+}
+
+TEST_F(AssemblerRISCV64Test, Add) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Add, "add {reg1}, {reg2}, {reg3}"), "Add");
+}
+
+TEST_F(AssemblerRISCV64Test, Sub) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Sub, "sub {reg1}, {reg2}, {reg3}"), "Sub");
+}
+
+TEST_F(AssemblerRISCV64Test, Slt) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Slt, "slt {reg1}, {reg2}, {reg3}"), "Slt");
+}
+
+TEST_F(AssemblerRISCV64Test, Sltu) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Sltu, "sltu {reg1}, {reg2}, {reg3}"), "Sltu");
+}
+
+TEST_F(AssemblerRISCV64Test, Xor) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Xor, "xor {reg1}, {reg2}, {reg3}"), "Xor");
+}
+
+TEST_F(AssemblerRISCV64Test, Or) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Or, "or {reg1}, {reg2}, {reg3}"), "Or");
+}
+
+TEST_F(AssemblerRISCV64Test, And) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::And, "and {reg1}, {reg2}, {reg3}"), "And");
+}
+
+TEST_F(AssemblerRISCV64Test, Sll) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Sll, "sll {reg1}, {reg2}, {reg3}"), "Sll");
+}
+
+TEST_F(AssemblerRISCV64Test, Srl) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Srl, "srl {reg1}, {reg2}, {reg3}"), "Srl");
+}
+
+TEST_F(AssemblerRISCV64Test, Sra) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Sra, "sra {reg1}, {reg2}, {reg3}"), "Sra");
+}
+
+TEST_F(AssemblerRISCV64Test, Addiw) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Addiw, -12, "addiw {reg1}, {reg2}, {imm}"), "Addiw");
+}
+
+TEST_F(AssemblerRISCV64Test, Slliw) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Slliw, 5, "slliw {reg1}, {reg2}, {imm}"), "Slliw");
+}
+
+TEST_F(AssemblerRISCV64Test, Srliw) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Srliw, 5, "srliw {reg1}, {reg2}, {imm}"), "Srliw");
+}
+
+TEST_F(AssemblerRISCV64Test, Sraiw) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Sraiw, 5, "sraiw {reg1}, {reg2}, {imm}"), "Sraiw");
+}
+
+TEST_F(AssemblerRISCV64Test, Addw) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Addw, "addw {reg1}, {reg2}, {reg3}"), "Addw");
+}
+
+TEST_F(AssemblerRISCV64Test, Subw) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Subw, "subw {reg1}, {reg2}, {reg3}"), "Subw");
+}
+
+TEST_F(AssemblerRISCV64Test, Sllw) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Sllw, "sllw {reg1}, {reg2}, {reg3}"), "Sllw");
+}
+
+TEST_F(AssemblerRISCV64Test, Srlw) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Srlw, "srlw {reg1}, {reg2}, {reg3}"), "Srlw");
+}
+
+TEST_F(AssemblerRISCV64Test, Sraw) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Sraw, "sraw {reg1}, {reg2}, {reg3}"), "Sraw");
+}
+
+TEST_F(AssemblerRISCV64Test, Ecall) {
+ __ Ecall();
+ DriverStr("ecall\n", "Ecall");
+}
+
+TEST_F(AssemblerRISCV64Test, Ebreak) {
+ __ Ebreak();
+ DriverStr("ebreak\n", "Ebreak");
+}
+
+TEST_F(AssemblerRISCV64Test, Fence) {
+ auto get_fence_type_string = [](uint32_t fence_type) {
+ CHECK_LE(fence_type, 0xfu);
+ std::string result;
+ if ((fence_type & kFenceInput) != 0u) {
+ result += "i";
+ }
+ if ((fence_type & kFenceOutput) != 0u) {
+ result += "o";
+ }
+ if ((fence_type & kFenceRead) != 0u) {
+ result += "r";
+ }
+ if ((fence_type & kFenceWrite) != 0u) {
+ result += "w";
+ }
+ if (result.empty()) {
+ result += "0";
+ }
+ return result;
+ };
+
+ std::string expected;
+ // Note: The `pred` and `succ` are 4 bits each.
+ // Some combinations are not really useful but the assembler can emit them all.
+ for (uint32_t pred = 0u; pred != 0x10; ++pred) {
+ for (uint32_t succ = 0u; succ != 0x10; ++succ) {
+ __ Fence(pred, succ);
+ expected +=
+ "fence " + get_fence_type_string(pred) + ", " + get_fence_type_string(succ) + "\n";
+ }
+ }
+ DriverStr(expected, "Fence");
+}
+
+TEST_F(AssemblerRISCV64Test, FenceTso) {
+ __ FenceTso();
+ DriverStr("fence.tso", "FenceTso");
+}
+
+TEST_F(AssemblerRISCV64Test, FenceI) {
+ __ FenceI();
+ DriverStr("fence.i", "FenceI");
+}
+
+TEST_F(AssemblerRISCV64Test, Mul) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Mul, "mul {reg1}, {reg2}, {reg3}"), "Mul");
+}
+
+TEST_F(AssemblerRISCV64Test, Mulh) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Mulh, "mulh {reg1}, {reg2}, {reg3}"), "Mulh");
+}
+
+TEST_F(AssemblerRISCV64Test, Mulhsu) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Mulhsu, "mulhsu {reg1}, {reg2}, {reg3}"), "Mulhsu");
+}
+
+TEST_F(AssemblerRISCV64Test, Mulhu) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Mulhu, "mulhu {reg1}, {reg2}, {reg3}"), "Mulhu");
+}
+
+TEST_F(AssemblerRISCV64Test, Div) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Div, "div {reg1}, {reg2}, {reg3}"), "Div");
+}
+
+TEST_F(AssemblerRISCV64Test, Divu) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Divu, "divu {reg1}, {reg2}, {reg3}"), "Divu");
+}
+
+TEST_F(AssemblerRISCV64Test, Rem) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Rem, "rem {reg1}, {reg2}, {reg3}"), "Rem");
+}
+
+TEST_F(AssemblerRISCV64Test, Remu) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Remu, "remu {reg1}, {reg2}, {reg3}"), "Remu");
+}
+
+TEST_F(AssemblerRISCV64Test, Mulw) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Mulw, "mulw {reg1}, {reg2}, {reg3}"), "Mulw");
+}
+
+TEST_F(AssemblerRISCV64Test, Divw) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Divw, "divw {reg1}, {reg2}, {reg3}"), "Divw");
+}
+
+TEST_F(AssemblerRISCV64Test, Divuw) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Divuw, "divuw {reg1}, {reg2}, {reg3}"), "Divuw");
+}
+
+TEST_F(AssemblerRISCV64Test, Remw) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Remw, "remw {reg1}, {reg2}, {reg3}"), "Remw");
+}
+
+TEST_F(AssemblerRISCV64Test, Remuw) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Remuw, "remuw {reg1}, {reg2}, {reg3}"), "Remuw");
+}
+
+TEST_F(AssemblerRISCV64Test, LrW) {
+ auto invalid_aqrl = [](AqRl aqrl) { return aqrl == AqRl::kRelease; };
+ DriverStr(RepeatRRAqRl(&Riscv64Assembler::LrW, "lr.w{aqrl} {reg1}, ({reg2})", invalid_aqrl),
+ "LrW");
+}
+
+TEST_F(AssemblerRISCV64Test, LrD) {
+ auto invalid_aqrl = [](AqRl aqrl) { return aqrl == AqRl::kRelease; };
+ DriverStr(RepeatRRAqRl(&Riscv64Assembler::LrD, "lr.d{aqrl} {reg1}, ({reg2})", invalid_aqrl),
+ "LrD");
+}
+
+TEST_F(AssemblerRISCV64Test, ScW) {
+ auto invalid_aqrl = [](AqRl aqrl) { return aqrl == AqRl::kAcquire; };
+ DriverStr(
+ RepeatRRRAqRl(&Riscv64Assembler::ScW, "sc.w{aqrl} {reg1}, {reg2}, ({reg3})", invalid_aqrl),
+ "ScW");
+}
+
+TEST_F(AssemblerRISCV64Test, ScD) {
+ auto invalid_aqrl = [](AqRl aqrl) { return aqrl == AqRl::kAcquire; };
+ DriverStr(
+ RepeatRRRAqRl(&Riscv64Assembler::ScD, "sc.d{aqrl} {reg1}, {reg2}, ({reg3})", invalid_aqrl),
+ "ScD");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoSwapW) {
+ DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoSwapW, "amoswap.w{aqrl} {reg1}, {reg2}, ({reg3})"),
+ "AmoSwapW");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoSwapD) {
+ DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoSwapD, "amoswap.d{aqrl} {reg1}, {reg2}, ({reg3})"),
+ "AmoSwapD");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoAddW) {
+ DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoAddW, "amoadd.w{aqrl} {reg1}, {reg2}, ({reg3})"),
+ "AmoAddW");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoAddD) {
+ DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoAddD, "amoadd.d{aqrl} {reg1}, {reg2}, ({reg3})"),
+ "AmoAddD");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoXorW) {
+ DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoXorW, "amoxor.w{aqrl} {reg1}, {reg2}, ({reg3})"),
+ "AmoXorW");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoXorD) {
+ DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoXorD, "amoxor.d{aqrl} {reg1}, {reg2}, ({reg3})"),
+ "AmoXorD");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoAndW) {
+ DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoAndW, "amoand.w{aqrl} {reg1}, {reg2}, ({reg3})"),
+ "AmoAndW");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoAndD) {
+ DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoAndD, "amoand.d{aqrl} {reg1}, {reg2}, ({reg3})"),
+ "AmoAndD");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoOrW) {
+ DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoOrW, "amoor.w{aqrl} {reg1}, {reg2}, ({reg3})"),
+ "AmoOrW");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoOrD) {
+ DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoOrD, "amoor.d{aqrl} {reg1}, {reg2}, ({reg3})"),
+ "AmoOrD");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoMinW) {
+ DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMinW, "amomin.w{aqrl} {reg1}, {reg2}, ({reg3})"),
+ "AmoMinW");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoMinD) {
+ DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMinD, "amomin.d{aqrl} {reg1}, {reg2}, ({reg3})"),
+ "AmoMinD");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoMaxW) {
+ DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMaxW, "amomax.w{aqrl} {reg1}, {reg2}, ({reg3})"),
+ "AmoMaxW");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoMaxD) {
+ DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMaxD, "amomax.d{aqrl} {reg1}, {reg2}, ({reg3})"),
+ "AmoMaxD");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoMinuW) {
+ DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMinuW, "amominu.w{aqrl} {reg1}, {reg2}, ({reg3})"),
+ "AmoMinuW");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoMinuD) {
+ DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMinuD, "amominu.d{aqrl} {reg1}, {reg2}, ({reg3})"),
+ "AmoMinuD");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoMaxuW) {
+ DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMaxuW, "amomaxu.w{aqrl} {reg1}, {reg2}, ({reg3})"),
+ "AmoMaxuW");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoMaxuD) {
+ DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMaxuD, "amomaxu.d{aqrl} {reg1}, {reg2}, ({reg3})"),
+ "AmoMaxuD");
+}
+
+TEST_F(AssemblerRISCV64Test, Csrrw) {
+ DriverStr(RepeatCsrrX(&Riscv64Assembler::Csrrw, "csrrw {reg1}, {csr}, {reg2}"), "Csrrw");
+}
+
+TEST_F(AssemblerRISCV64Test, Csrrs) {
+ DriverStr(RepeatCsrrX(&Riscv64Assembler::Csrrs, "csrrs {reg1}, {csr}, {reg2}"), "Csrrs");
+}
+
+TEST_F(AssemblerRISCV64Test, Csrrc) {
+ DriverStr(RepeatCsrrX(&Riscv64Assembler::Csrrc, "csrrc {reg1}, {csr}, {reg2}"), "Csrrc");
+}
+
+TEST_F(AssemblerRISCV64Test, Csrrwi) {
+ DriverStr(RepeatCsrrXi(&Riscv64Assembler::Csrrwi, "csrrwi {reg}, {csr}, {uimm}"), "Csrrwi");
+}
+
+TEST_F(AssemblerRISCV64Test, Csrrsi) {
+ DriverStr(RepeatCsrrXi(&Riscv64Assembler::Csrrsi, "csrrsi {reg}, {csr}, {uimm}"), "Csrrsi");
+}
+
+TEST_F(AssemblerRISCV64Test, Csrrci) {
+ DriverStr(RepeatCsrrXi(&Riscv64Assembler::Csrrci, "csrrci {reg}, {csr}, {uimm}"), "Csrrci");
+}
+
+TEST_F(AssemblerRISCV64Test, FLw) {
+ DriverStr(RepeatFRIb(&Riscv64Assembler::FLw, -12, "flw {reg1}, {imm}({reg2})"), "FLw");
+}
+
+TEST_F(AssemblerRISCV64Test, FLd) {
+ DriverStr(RepeatFRIb(&Riscv64Assembler::FLd, -12, "fld {reg1}, {imm}({reg2})"), "FLw");
+}
+
+TEST_F(AssemblerRISCV64Test, FSw) {
+ DriverStr(RepeatFRIb(&Riscv64Assembler::FSw, 2, "fsw {reg1}, {imm}({reg2})"), "FSw");
+}
+
+TEST_F(AssemblerRISCV64Test, FSd) {
+ DriverStr(RepeatFRIb(&Riscv64Assembler::FSd, 2, "fsd {reg1}, {imm}({reg2})"), "FSd");
+}
+
+TEST_F(AssemblerRISCV64Test, FMAddS) {
+ DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FMAddS,
+ "fmadd.s {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FMAddS");
+}
+
+TEST_F(AssemblerRISCV64Test, FMAddS_Default) {
+ DriverStr(RepeatFFFF(&Riscv64Assembler::FMAddS, "fmadd.s {reg1}, {reg2}, {reg3}, {reg4}"),
+ "FMAddS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FMAddD) {
+ DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FMAddD,
+ "fmadd.d {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FMAddD");
+}
+
+TEST_F(AssemblerRISCV64Test, FMAddD_Default) {
+ DriverStr(RepeatFFFF(&Riscv64Assembler::FMAddD, "fmadd.d {reg1}, {reg2}, {reg3}, {reg4}"),
+ "FMAddD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FMSubS) {
+ DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FMSubS,
+ "fmsub.s {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FMSubS");
+}
+
+TEST_F(AssemblerRISCV64Test, FMSubS_Default) {
+ DriverStr(RepeatFFFF(&Riscv64Assembler::FMSubS, "fmsub.s {reg1}, {reg2}, {reg3}, {reg4}"),
+ "FMSubS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FMSubD) {
+ DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FMSubD,
+ "fmsub.d {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FMSubD");
+}
+
+TEST_F(AssemblerRISCV64Test, FMSubD_Default) {
+ DriverStr(RepeatFFFF(&Riscv64Assembler::FMSubD, "fmsub.d {reg1}, {reg2}, {reg3}, {reg4}"),
+ "FMSubD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FNMSubS) {
+ DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FNMSubS,
+ "fnmsub.s {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FNMSubS");
+}
+
+TEST_F(AssemblerRISCV64Test, FNMSubS_Default) {
+ DriverStr(RepeatFFFF(&Riscv64Assembler::FNMSubS, "fnmsub.s {reg1}, {reg2}, {reg3}, {reg4}"),
+ "FNMSubS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FNMSubD) {
+ DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FNMSubD,
+ "fnmsub.d {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FNMSubD");
+}
+
+TEST_F(AssemblerRISCV64Test, FNMSubD_Default) {
+ DriverStr(RepeatFFFF(&Riscv64Assembler::FNMSubD, "fnmsub.d {reg1}, {reg2}, {reg3}, {reg4}"),
+ "FNMSubD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FNMAddS) {
+ DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FNMAddS,
+ "fnmadd.s {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FNMAddS");
+}
+
+TEST_F(AssemblerRISCV64Test, FNMAddS_Default) {
+ DriverStr(RepeatFFFF(&Riscv64Assembler::FNMAddS, "fnmadd.s {reg1}, {reg2}, {reg3}, {reg4}"),
+ "FNMAddS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FNMAddD) {
+ DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FNMAddD,
+ "fnmadd.d {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FNMAddD");
+}
+
+TEST_F(AssemblerRISCV64Test, FNMAddD_Default) {
+ DriverStr(RepeatFFFF(&Riscv64Assembler::FNMAddD, "fnmadd.d {reg1}, {reg2}, {reg3}, {reg4}"),
+ "FNMAddD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FAddS) {
+ DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FAddS, "fadd.s {reg1}, {reg2}, {reg3}, {rm}"),
+ "FAddS");
+}
+
+TEST_F(AssemblerRISCV64Test, FAddS_Default) {
+ DriverStr(RepeatFFF(&Riscv64Assembler::FAddS, "fadd.s {reg1}, {reg2}, {reg3}"), "FAddS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FAddD) {
+ DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FAddD, "fadd.d {reg1}, {reg2}, {reg3}, {rm}"),
+ "FAddD");
+}
+
+TEST_F(AssemblerRISCV64Test, FAddD_Default) {
+ DriverStr(RepeatFFF(&Riscv64Assembler::FAddD, "fadd.d {reg1}, {reg2}, {reg3}"), "FAddD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FSubS) {
+ DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FSubS, "fsub.s {reg1}, {reg2}, {reg3}, {rm}"),
+ "FSubS");
+}
+
+TEST_F(AssemblerRISCV64Test, FSubS_Default) {
+ DriverStr(RepeatFFF(&Riscv64Assembler::FSubS, "fsub.s {reg1}, {reg2}, {reg3}"), "FSubS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FSubD) {
+ DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FSubD, "fsub.d {reg1}, {reg2}, {reg3}, {rm}"),
+ "FSubD");
+}
+
+TEST_F(AssemblerRISCV64Test, FSubD_Default) {
+ DriverStr(RepeatFFF(&Riscv64Assembler::FSubD, "fsub.d {reg1}, {reg2}, {reg3}"), "FSubD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FMulS) {
+ DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FMulS, "fmul.s {reg1}, {reg2}, {reg3}, {rm}"),
+ "FMulS");
+}
+
+TEST_F(AssemblerRISCV64Test, FMulS_Default) {
+ DriverStr(RepeatFFF(&Riscv64Assembler::FMulS, "fmul.s {reg1}, {reg2}, {reg3}"), "FMulS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FMulD) {
+ DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FMulD, "fmul.d {reg1}, {reg2}, {reg3}, {rm}"),
+ "FMulD");
+}
+
+TEST_F(AssemblerRISCV64Test, FMulD_Default) {
+ DriverStr(RepeatFFF(&Riscv64Assembler::FMulD, "fmul.d {reg1}, {reg2}, {reg3}"), "FMulD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FDivS) {
+ DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FDivS, "fdiv.s {reg1}, {reg2}, {reg3}, {rm}"),
+ "FDivS");
+}
+
+TEST_F(AssemblerRISCV64Test, FDivS_Default) {
+ DriverStr(RepeatFFF(&Riscv64Assembler::FDivS, "fdiv.s {reg1}, {reg2}, {reg3}"), "FDivS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FDivD) {
+ DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FDivD, "fdiv.d {reg1}, {reg2}, {reg3}, {rm}"),
+ "FDivD");
+}
+
+TEST_F(AssemblerRISCV64Test, FDivD_Default) {
+ DriverStr(RepeatFFF(&Riscv64Assembler::FDivD, "fdiv.d {reg1}, {reg2}, {reg3}"), "FDivD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FSqrtS) {
+ DriverStr(RepeatFFRoundingMode(&Riscv64Assembler::FSqrtS, "fsqrt.s {reg1}, {reg2}, {rm}"),
+ "FSqrtS");
+}
+
+TEST_F(AssemblerRISCV64Test, FSqrtS_Default) {
+ DriverStr(RepeatFF(&Riscv64Assembler::FSqrtS, "fsqrt.s {reg1}, {reg2}"), "FSqrtS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FSqrtD) {
+ DriverStr(RepeatFFRoundingMode(&Riscv64Assembler::FSqrtD, "fsqrt.d {reg1}, {reg2}, {rm}"),
+ "FSqrtD");
+}
+
+TEST_F(AssemblerRISCV64Test, FSqrtD_Default) {
+ DriverStr(RepeatFF(&Riscv64Assembler::FSqrtD, "fsqrt.d {reg1}, {reg2}"), "FSqrtD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FSgnjS) {
+ DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjS, "fsgnj.s {reg1}, {reg2}, {reg3}"), "FSgnjS");
+}
+
+TEST_F(AssemblerRISCV64Test, FSgnjD) {
+ DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjD, "fsgnj.d {reg1}, {reg2}, {reg3}"), "FSgnjD");
+}
+
+TEST_F(AssemblerRISCV64Test, FSgnjnS) {
+ DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjnS, "fsgnjn.s {reg1}, {reg2}, {reg3}"), "FSgnjnS");
+}
+
+TEST_F(AssemblerRISCV64Test, FSgnjnD) {
+ DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjnD, "fsgnjn.d {reg1}, {reg2}, {reg3}"), "FSgnjnD");
+}
+
+TEST_F(AssemblerRISCV64Test, FSgnjxS) {
+ DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjxS, "fsgnjx.s {reg1}, {reg2}, {reg3}"), "FSgnjxS");
+}
+
+TEST_F(AssemblerRISCV64Test, FSgnjxD) {
+ DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjxD, "fsgnjx.d {reg1}, {reg2}, {reg3}"), "FSgnjxD");
+}
+
+TEST_F(AssemblerRISCV64Test, FMinS) {
+ DriverStr(RepeatFFF(&Riscv64Assembler::FMinS, "fmin.s {reg1}, {reg2}, {reg3}"), "FMinS");
+}
+
+TEST_F(AssemblerRISCV64Test, FMinD) {
+ DriverStr(RepeatFFF(&Riscv64Assembler::FMinD, "fmin.d {reg1}, {reg2}, {reg3}"), "FMinD");
+}
+
+TEST_F(AssemblerRISCV64Test, FMaxS) {
+ DriverStr(RepeatFFF(&Riscv64Assembler::FMaxS, "fmax.s {reg1}, {reg2}, {reg3}"), "FMaxS");
+}
+
+TEST_F(AssemblerRISCV64Test, FMaxD) {
+ DriverStr(RepeatFFF(&Riscv64Assembler::FMaxD, "fmax.d {reg1}, {reg2}, {reg3}"), "FMaxD");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtSD) {
+ DriverStr(RepeatFFRoundingMode(&Riscv64Assembler::FCvtSD, "fcvt.s.d {reg1}, {reg2}, {rm}"),
+ "FCvtSD");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtSD_Default) {
+ DriverStr(RepeatFF(&Riscv64Assembler::FCvtSD, "fcvt.s.d {reg1}, {reg2}"), "FCvtSD_Default");
+}
+
+// This conversion is lossless, so the rounding mode is meaningless and the assembler we're
+// testing against does not even accept the rounding mode argument, so this test is disabled.
+TEST_F(AssemblerRISCV64Test, DISABLED_FCvtDS) {
+ DriverStr(RepeatFFRoundingMode(&Riscv64Assembler::FCvtDS, "fcvt.d.s {reg1}, {reg2}, {rm}"),
+ "FCvtDS");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtDS_Default) {
+ DriverStr(RepeatFF(&Riscv64Assembler::FCvtDS, "fcvt.d.s {reg1}, {reg2}"), "FCvtDS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FEqS) {
+ DriverStr(RepeatRFF(&Riscv64Assembler::FEqS, "feq.s {reg1}, {reg2}, {reg3}"), "FEqS");
+}
+
+TEST_F(AssemblerRISCV64Test, FEqD) {
+ DriverStr(RepeatRFF(&Riscv64Assembler::FEqD, "feq.d {reg1}, {reg2}, {reg3}"), "FEqD");
+}
+
+TEST_F(AssemblerRISCV64Test, FLtS) {
+ DriverStr(RepeatRFF(&Riscv64Assembler::FLtS, "flt.s {reg1}, {reg2}, {reg3}"), "FLtS");
+}
+
+TEST_F(AssemblerRISCV64Test, FLtD) {
+ DriverStr(RepeatRFF(&Riscv64Assembler::FLtD, "flt.d {reg1}, {reg2}, {reg3}"), "FLtD");
+}
+
+TEST_F(AssemblerRISCV64Test, FLeS) {
+ DriverStr(RepeatRFF(&Riscv64Assembler::FLeS, "fle.s {reg1}, {reg2}, {reg3}"), "FLeS");
+}
+
+TEST_F(AssemblerRISCV64Test, FLeD) {
+ DriverStr(RepeatRFF(&Riscv64Assembler::FLeD, "fle.d {reg1}, {reg2}, {reg3}"), "FLeD");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtWS) {
+ DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtWS, "fcvt.w.s {reg1}, {reg2}, {rm}"),
+ "FCvtWS");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtWS_Default) {
+ DriverStr(RepeatrF(&Riscv64Assembler::FCvtWS, "fcvt.w.s {reg1}, {reg2}"), "FCvtWS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtWD) {
+ DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtWD, "fcvt.w.d {reg1}, {reg2}, {rm}"),
+ "FCvtWD");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtWD_Default) {
+ DriverStr(RepeatrF(&Riscv64Assembler::FCvtWD, "fcvt.w.d {reg1}, {reg2}"), "FCvtWD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtWuS) {
+ DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtWuS, "fcvt.wu.s {reg1}, {reg2}, {rm}"),
+ "FCvtWuS");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtWuS_Default) {
+ DriverStr(RepeatrF(&Riscv64Assembler::FCvtWuS, "fcvt.wu.s {reg1}, {reg2}"), "FCvtWuS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtWuD) {
+ DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtWuD, "fcvt.wu.d {reg1}, {reg2}, {rm}"),
+ "FCvtWuD");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtWuD_Default) {
+ DriverStr(RepeatrF(&Riscv64Assembler::FCvtWuD, "fcvt.wu.d {reg1}, {reg2}"), "FCvtWuD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtLS) {
+ DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtLS, "fcvt.l.s {reg1}, {reg2}, {rm}"),
+ "FCvtLS");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtLS_Default) {
+ DriverStr(RepeatrF(&Riscv64Assembler::FCvtLS, "fcvt.l.s {reg1}, {reg2}"), "FCvtLS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtLD) {
+ DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtLD, "fcvt.l.d {reg1}, {reg2}, {rm}"),
+ "FCvtLD");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtLD_Default) {
+ DriverStr(RepeatrF(&Riscv64Assembler::FCvtLD, "fcvt.l.d {reg1}, {reg2}"), "FCvtLD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtLuS) {
+ DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtLuS, "fcvt.lu.s {reg1}, {reg2}, {rm}"),
+ "FCvtLuS");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtLuS_Default) {
+ DriverStr(RepeatrF(&Riscv64Assembler::FCvtLuS, "fcvt.lu.s {reg1}, {reg2}"), "FCvtLuS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtLuD) {
+ DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtLuD, "fcvt.lu.d {reg1}, {reg2}, {rm}"),
+ "FCvtLuD");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtLuD_Default) {
+ DriverStr(RepeatrF(&Riscv64Assembler::FCvtLuD, "fcvt.lu.d {reg1}, {reg2}"), "FCvtLuD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtSW) {
+ DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtSW, "fcvt.s.w {reg1}, {reg2}, {rm}"),
+ "FCvtSW");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtSW_Default) {
+ DriverStr(RepeatFr(&Riscv64Assembler::FCvtSW, "fcvt.s.w {reg1}, {reg2}"), "FCvtSW_Default");
+}
+
+// This conversion is lossless, so the rounding mode is meaningless and the assembler we're
+// testing against does not even accept the rounding mode argument, so this test is disabled.
+TEST_F(AssemblerRISCV64Test, DISABLED_FCvtDW) {
+ DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtDW, "fcvt.d.w {reg1}, {reg2}, {rm}"),
+ "FCvtDW");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtDW_Default) {
+ DriverStr(RepeatFr(&Riscv64Assembler::FCvtDW, "fcvt.d.w {reg1}, {reg2}"), "FCvtDW_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtSWu) {
+ DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtSWu, "fcvt.s.wu {reg1}, {reg2}, {rm}"),
+ "FCvtSWu");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtSWu_Default) {
+ DriverStr(RepeatFr(&Riscv64Assembler::FCvtSWu, "fcvt.s.wu {reg1}, {reg2}"), "FCvtSWu_Default");
+}
+
+// This conversion is lossless, so the rounding mode is meaningless and the assembler we're
+// testing against does not even accept the rounding mode argument, so this test is disabled.
+TEST_F(AssemblerRISCV64Test, DISABLED_FCvtDWu) {
+ DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtDWu, "fcvt.d.wu {reg1}, {reg2}, {rm}"),
+ "FCvtDWu");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtDWu_Default) {
+ DriverStr(RepeatFr(&Riscv64Assembler::FCvtDWu, "fcvt.d.wu {reg1}, {reg2}"), "FCvtDWu_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtSL) {
+ DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtSL, "fcvt.s.l {reg1}, {reg2}, {rm}"),
+ "FCvtSL");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtSL_Default) {
+ DriverStr(RepeatFr(&Riscv64Assembler::FCvtSL, "fcvt.s.l {reg1}, {reg2}"), "FCvtSL_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtDL) {
+ DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtDL, "fcvt.d.l {reg1}, {reg2}, {rm}"),
+ "FCvtDL");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtDL_Default) {
+ DriverStr(RepeatFr(&Riscv64Assembler::FCvtDL, "fcvt.d.l {reg1}, {reg2}"), "FCvtDL_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtSLu) {
+ DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtSLu, "fcvt.s.lu {reg1}, {reg2}, {rm}"),
+ "FCvtSLu");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtSLu_Default) {
+ DriverStr(RepeatFr(&Riscv64Assembler::FCvtSLu, "fcvt.s.lu {reg1}, {reg2}"), "FCvtSLu_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtDLu) {
+ DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtDLu, "fcvt.d.lu {reg1}, {reg2}, {rm}"),
+ "FCvtDLu");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtDLu_Default) {
+ DriverStr(RepeatFr(&Riscv64Assembler::FCvtDLu, "fcvt.d.lu {reg1}, {reg2}"), "FCvtDLu_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FMvXW) {
+ DriverStr(RepeatRF(&Riscv64Assembler::FMvXW, "fmv.x.w {reg1}, {reg2}"), "FMvXW");
+}
+
+TEST_F(AssemblerRISCV64Test, FMvXD) {
+ DriverStr(RepeatRF(&Riscv64Assembler::FMvXD, "fmv.x.d {reg1}, {reg2}"), "FMvXD");
+}
+
+TEST_F(AssemblerRISCV64Test, FMvWX) {
+ DriverStr(RepeatFR(&Riscv64Assembler::FMvWX, "fmv.w.x {reg1}, {reg2}"), "FMvWX");
+}
+
+TEST_F(AssemblerRISCV64Test, FMvDX) {
+ DriverStr(RepeatFR(&Riscv64Assembler::FMvDX, "fmv.d.x {reg1}, {reg2}"), "FMvDX");
+}
+
+TEST_F(AssemblerRISCV64Test, FClassS) {
+ DriverStr(RepeatRF(&Riscv64Assembler::FClassS, "fclass.s {reg1}, {reg2}"), "FClassS");
+}
+
+TEST_F(AssemblerRISCV64Test, FClassD) {
+ DriverStr(RepeatrF(&Riscv64Assembler::FClassD, "fclass.d {reg1}, {reg2}"), "FClassD");
+}
+
+TEST_F(AssemblerRISCV64Test, AddUw) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::AddUw, "add.uw {reg1}, {reg2}, {reg3}"), "AddUw");
+}
+
+TEST_F(AssemblerRISCV64Test, Sh1Add) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Sh1Add, "sh1add {reg1}, {reg2}, {reg3}"), "Sh1Add");
+}
+
+TEST_F(AssemblerRISCV64Test, Sh1AddUw) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Sh1AddUw, "sh1add.uw {reg1}, {reg2}, {reg3}"), "Sh1AddUw");
+}
+
+TEST_F(AssemblerRISCV64Test, Sh2Add) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Sh2Add, "sh2add {reg1}, {reg2}, {reg3}"), "Sh2Add");
+}
+
+TEST_F(AssemblerRISCV64Test, Sh2AddUw) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Sh2AddUw, "sh2add.uw {reg1}, {reg2}, {reg3}"), "Sh2AddUw");
+}
+
+TEST_F(AssemblerRISCV64Test, Sh3Add) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Sh3Add, "sh3add {reg1}, {reg2}, {reg3}"), "Sh3Add");
+}
+
+TEST_F(AssemblerRISCV64Test, Sh3AddUw) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Sh3AddUw, "sh3add.uw {reg1}, {reg2}, {reg3}"), "Sh3AddUw");
+}
+
+TEST_F(AssemblerRISCV64Test, SlliUw) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::SlliUw, 6, "slli.uw {reg1}, {reg2}, {imm}"), "SlliUw");
+}
+
+TEST_F(AssemblerRISCV64Test, Andn) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Andn, "andn {reg1}, {reg2}, {reg3}"), "Andn");
+}
+
+TEST_F(AssemblerRISCV64Test, Orn) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Orn, "orn {reg1}, {reg2}, {reg3}"), "Orn");
+}
+
+TEST_F(AssemblerRISCV64Test, Xnor) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Xnor, "xnor {reg1}, {reg2}, {reg3}"), "Xnor");
+}
+
+TEST_F(AssemblerRISCV64Test, Clz) {
+ DriverStr(RepeatRR(&Riscv64Assembler::Clz, "clz {reg1}, {reg2}"), "Clz");
+}
+
+TEST_F(AssemblerRISCV64Test, Clzw) {
+ DriverStr(RepeatRR(&Riscv64Assembler::Clzw, "clzw {reg1}, {reg2}"), "Clzw");
+}
+
+TEST_F(AssemblerRISCV64Test, Ctz) {
+ DriverStr(RepeatRR(&Riscv64Assembler::Ctz, "ctz {reg1}, {reg2}"), "Ctz");
+}
+
+TEST_F(AssemblerRISCV64Test, Ctzw) {
+ DriverStr(RepeatRR(&Riscv64Assembler::Ctzw, "ctzw {reg1}, {reg2}"), "Ctzw");
+}
+
+TEST_F(AssemblerRISCV64Test, Cpop) {
+ DriverStr(RepeatRR(&Riscv64Assembler::Cpop, "cpop {reg1}, {reg2}"), "Cpop");
+}
+
+TEST_F(AssemblerRISCV64Test, Cpopw) {
+ DriverStr(RepeatRR(&Riscv64Assembler::Cpopw, "cpopw {reg1}, {reg2}"), "Cpopw");
+}
+
+TEST_F(AssemblerRISCV64Test, Min) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Min, "min {reg1}, {reg2}, {reg3}"), "Min");
+}
+
+TEST_F(AssemblerRISCV64Test, Minu) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Minu, "minu {reg1}, {reg2}, {reg3}"), "Minu");
+}
+
+TEST_F(AssemblerRISCV64Test, Max) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Max, "max {reg1}, {reg2}, {reg3}"), "Max");
+}
+
+TEST_F(AssemblerRISCV64Test, Maxu) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Maxu, "maxu {reg1}, {reg2}, {reg3}"), "Maxu");
+}
+
+TEST_F(AssemblerRISCV64Test, Rol) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Rol, "rol {reg1}, {reg2}, {reg3}"), "Rol");
+}
+
+TEST_F(AssemblerRISCV64Test, Rolw) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Rolw, "rolw {reg1}, {reg2}, {reg3}"), "Rolw");
+}
+
+TEST_F(AssemblerRISCV64Test, Ror) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Ror, "ror {reg1}, {reg2}, {reg3}"), "Ror");
+}
+
+TEST_F(AssemblerRISCV64Test, Rorw) {
+ DriverStr(RepeatRRR(&Riscv64Assembler::Rorw, "rorw {reg1}, {reg2}, {reg3}"), "Rorw");
+}
+
+TEST_F(AssemblerRISCV64Test, Rori) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Rori, 6, "rori {reg1}, {reg2}, {imm}"), "Rori");
+}
+
+TEST_F(AssemblerRISCV64Test, Roriw) {
+ DriverStr(RepeatRRIb(&Riscv64Assembler::Roriw, 5, "roriw {reg1}, {reg2}, {imm}"), "Roriw");
+}
+
+TEST_F(AssemblerRISCV64Test, OrcB) {
+ DriverStr(RepeatRR(&Riscv64Assembler::OrcB, "orc.b {reg1}, {reg2}"), "OrcB");
+}
+
+TEST_F(AssemblerRISCV64Test, Rev8) {
+ DriverStr(RepeatRR(&Riscv64Assembler::Rev8, "rev8 {reg1}, {reg2}"), "Rev8");
+}
+
+// Pseudo instructions.
+TEST_F(AssemblerRISCV64Test, Nop) {
+ __ Nop();
+ DriverStr("addi zero,zero,0", "Nop");
+}
+
+TEST_F(AssemblerRISCV64Test, Li) {
+ SetUseSimpleMarch(true);
+ TestLoadConst64("Li",
+ /*can_use_tmp=*/ false,
+ [&](XRegister rd, int64_t value) { __ Li(rd, value); });
+}
+
+TEST_F(AssemblerRISCV64Test, Mv) {
+ DriverStr(RepeatRR(&Riscv64Assembler::Mv, "addi {reg1}, {reg2}, 0"), "Mv");
+}
+
+TEST_F(AssemblerRISCV64Test, Not) {
+ DriverStr(RepeatRR(&Riscv64Assembler::Not, "xori {reg1}, {reg2}, -1"), "Not");
+}
+
+TEST_F(AssemblerRISCV64Test, Neg) {
+ DriverStr(RepeatRR(&Riscv64Assembler::Neg, "sub {reg1}, x0, {reg2}"), "Neg");
+}
+
+TEST_F(AssemblerRISCV64Test, NegW) {
+ DriverStr(RepeatRR(&Riscv64Assembler::NegW, "subw {reg1}, x0, {reg2}"), "Neg");
+}
+
+TEST_F(AssemblerRISCV64Test, SextB) {
+ // Note: SEXT.B from the Zbb extension is not supported.
+ DriverStr(RepeatRR(&Riscv64Assembler::SextB,
+ "slli {reg1}, {reg2}, 56\n"
+ "srai {reg1}, {reg1}, 56"),
+ "SextB");
+}
+
+TEST_F(AssemblerRISCV64Test, SextH) {
+ // Note: SEXT.H from the Zbb extension is not supported.
+ DriverStr(RepeatRR(&Riscv64Assembler::SextH,
+ "slli {reg1}, {reg2}, 48\n"
+ "srai {reg1}, {reg1}, 48"),
+ "SextH");
+}
+
+TEST_F(AssemblerRISCV64Test, SextW) {
+ DriverStr(RepeatRR(&Riscv64Assembler::SextW, "addiw {reg1}, {reg2}, 0\n"), "SextW");
+}
+
+TEST_F(AssemblerRISCV64Test, ZextB) {
+ DriverStr(RepeatRR(&Riscv64Assembler::ZextB, "andi {reg1}, {reg2}, 255"), "ZextB");
+}
+
+TEST_F(AssemblerRISCV64Test, ZextH) {
+ // Note: ZEXT.H from the Zbb extension is not supported.
+ DriverStr(RepeatRR(&Riscv64Assembler::ZextH,
+ "slli {reg1}, {reg2}, 48\n"
+ "srli {reg1}, {reg1}, 48"),
+ "SextH");
+}
+
+TEST_F(AssemblerRISCV64Test, ZextW) {
+ DriverStr(RepeatRR(&Riscv64Assembler::ZextW,
+ "slli {reg1}, {reg2}, 32\n"
+ "srli {reg1}, {reg1}, 32"),
+ "ZextW");
+}
+
+TEST_F(AssemblerRISCV64Test, Seqz) {
+ DriverStr(RepeatRR(&Riscv64Assembler::Seqz, "sltiu {reg1}, {reg2}, 1\n"), "Seqz");
+}
+
+TEST_F(AssemblerRISCV64Test, Snez) {
+ DriverStr(RepeatRR(&Riscv64Assembler::Snez, "sltu {reg1}, zero, {reg2}\n"), "Snez");
+}
+
+TEST_F(AssemblerRISCV64Test, Sltz) {
+ DriverStr(RepeatRR(&Riscv64Assembler::Sltz, "slt {reg1}, {reg2}, zero\n"), "Sltz");
+}
+
+TEST_F(AssemblerRISCV64Test, Sgtz) {
+ DriverStr(RepeatRR(&Riscv64Assembler::Sgtz, "slt {reg1}, zero, {reg2}\n"), "Sgtz");
+}
+
+TEST_F(AssemblerRISCV64Test, FMvS) {
+ DriverStr(RepeatFF(&Riscv64Assembler::FMvS, "fsgnj.s {reg1}, {reg2}, {reg2}\n"), "FMvS");
+}
+
+TEST_F(AssemblerRISCV64Test, FAbsS) {
+ DriverStr(RepeatFF(&Riscv64Assembler::FAbsS, "fsgnjx.s {reg1}, {reg2}, {reg2}\n"), "FAbsS");
+}
+
+TEST_F(AssemblerRISCV64Test, FNegS) {
+ DriverStr(RepeatFF(&Riscv64Assembler::FNegS, "fsgnjn.s {reg1}, {reg2}, {reg2}\n"), "FNegS");
+}
+
+TEST_F(AssemblerRISCV64Test, FMvD) {
+ DriverStr(RepeatFF(&Riscv64Assembler::FMvD, "fsgnj.d {reg1}, {reg2}, {reg2}\n"), "FMvD");
+}
+
+TEST_F(AssemblerRISCV64Test, FAbsD) {
+ DriverStr(RepeatFF(&Riscv64Assembler::FAbsD, "fsgnjx.d {reg1}, {reg2}, {reg2}\n"), "FAbsD");
+}
+
+TEST_F(AssemblerRISCV64Test, FNegD) {
+ DriverStr(RepeatFF(&Riscv64Assembler::FNegD, "fsgnjn.d {reg1}, {reg2}, {reg2}\n"), "FNegD");
+}
+
+TEST_F(AssemblerRISCV64Test, Beqz) {
+ // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+ DriverStr(RepeatRIbS(&Riscv64Assembler::Beqz, -11, 2, "beq {reg}, zero, {imm}\n"), "Beqz");
+}
+
+TEST_F(AssemblerRISCV64Test, Bnez) {
+ // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+ DriverStr(RepeatRIbS(&Riscv64Assembler::Bnez, -11, 2, "bne {reg}, zero, {imm}\n"), "Bnez");
+}
+
+TEST_F(AssemblerRISCV64Test, Blez) {
+ // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+ DriverStr(RepeatRIbS(&Riscv64Assembler::Blez, -11, 2, "bge zero, {reg}, {imm}\n"), "Blez");
+}
+
+TEST_F(AssemblerRISCV64Test, Bgez) {
+ // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+ DriverStr(RepeatRIbS(&Riscv64Assembler::Bgez, -11, 2, "bge {reg}, zero, {imm}\n"), "Bgez");
+}
+
+TEST_F(AssemblerRISCV64Test, Bltz) {
+ // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+ DriverStr(RepeatRIbS(&Riscv64Assembler::Bltz, -11, 2, "blt {reg}, zero, {imm}\n"), "Bltz");
+}
+
+TEST_F(AssemblerRISCV64Test, Bgtz) {
+ // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+ DriverStr(RepeatRIbS(&Riscv64Assembler::Bgtz, -11, 2, "blt zero, {reg}, {imm}\n"), "Bgtz");
+}
+
+TEST_F(AssemblerRISCV64Test, Bgt) {
+ // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+ DriverStr(RepeatRRIbS(&Riscv64Assembler::Bgt, -11, 2, "blt {reg2}, {reg1}, {imm}\n"), "Bgt");
+}
+
+TEST_F(AssemblerRISCV64Test, Ble) {
+ // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+ DriverStr(RepeatRRIbS(&Riscv64Assembler::Ble, -11, 2, "bge {reg2}, {reg1}, {imm}\n"), "Bge");
+}
+
+TEST_F(AssemblerRISCV64Test, Bgtu) {
+ // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+ DriverStr(RepeatRRIbS(&Riscv64Assembler::Bgtu, -11, 2, "bltu {reg2}, {reg1}, {imm}\n"), "Bgtu");
+}
+
+TEST_F(AssemblerRISCV64Test, Bleu) {
+ // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+ DriverStr(RepeatRRIbS(&Riscv64Assembler::Bleu, -11, 2, "bgeu {reg2}, {reg1}, {imm}\n"), "Bgeu");
+}
+
+TEST_F(AssemblerRISCV64Test, J) {
+ // TODO(riscv64): Change "-19, 2" to "-20, 1" for "C" Standard Extension.
+ DriverStr(RepeatIbS<int32_t>(&Riscv64Assembler::J, -19, 2, "j {imm}\n"), "J");
+}
+
+TEST_F(AssemblerRISCV64Test, JalRA) {
+ // TODO(riscv64): Change "-19, 2" to "-20, 1" for "C" Standard Extension.
+ DriverStr(RepeatIbS<int32_t>(&Riscv64Assembler::Jal, -19, 2, "jal {imm}\n"), "JalRA");
+}
+
+TEST_F(AssemblerRISCV64Test, Jr) {
+ DriverStr(RepeatR(&Riscv64Assembler::Jr, "jr {reg}\n"), "Jr");
+}
+
+TEST_F(AssemblerRISCV64Test, JalrRA) {
+ DriverStr(RepeatR(&Riscv64Assembler::Jalr, "jalr {reg}\n"), "JalrRA");
+}
+
+TEST_F(AssemblerRISCV64Test, Jalr0) {
+ DriverStr(RepeatRR(&Riscv64Assembler::Jalr, "jalr {reg1}, {reg2}\n"), "Jalr0");
+}
+
+TEST_F(AssemblerRISCV64Test, Ret) {
+ __ Ret();
+ DriverStr("ret\n", "Ret");
+}
+
+TEST_F(AssemblerRISCV64Test, RdCycle) {
+ DriverStr(RepeatR(&Riscv64Assembler::RdCycle, "rdcycle {reg}\n"), "RdCycle");
+}
+
+TEST_F(AssemblerRISCV64Test, RdTime) {
+ DriverStr(RepeatR(&Riscv64Assembler::RdTime, "rdtime {reg}\n"), "RdTime");
+}
+
+TEST_F(AssemblerRISCV64Test, RdInstret) {
+ DriverStr(RepeatR(&Riscv64Assembler::RdInstret, "rdinstret {reg}\n"), "RdInstret");
+}
+
+TEST_F(AssemblerRISCV64Test, Csrr) {
+ TestCsrrXMacro(
+ "Csrr", "csrr {reg}, {csr}", [&](uint32_t csr, XRegister rd) { __ Csrr(rd, csr); });
+}
+
+TEST_F(AssemblerRISCV64Test, Csrw) {
+ TestCsrrXMacro(
+ "Csrw", "csrw {csr}, {reg}", [&](uint32_t csr, XRegister rs) { __ Csrw(csr, rs); });
+}
+
+TEST_F(AssemblerRISCV64Test, Csrs) {
+ TestCsrrXMacro(
+ "Csrs", "csrs {csr}, {reg}", [&](uint32_t csr, XRegister rs) { __ Csrs(csr, rs); });
+}
+
+TEST_F(AssemblerRISCV64Test, Csrc) {
+ TestCsrrXMacro(
+ "Csrc", "csrc {csr}, {reg}", [&](uint32_t csr, XRegister rs) { __ Csrc(csr, rs); });
+}
+
+TEST_F(AssemblerRISCV64Test, Csrwi) {
+ TestCsrrXiMacro(
+ "Csrwi", "csrwi {csr}, {uimm}", [&](uint32_t csr, uint32_t uimm) { __ Csrwi(csr, uimm); });
+}
+
+TEST_F(AssemblerRISCV64Test, Csrsi) {
+ TestCsrrXiMacro(
+ "Csrsi", "csrsi {csr}, {uimm}", [&](uint32_t csr, uint32_t uimm) { __ Csrsi(csr, uimm); });
+}
+
+TEST_F(AssemblerRISCV64Test, Csrci) {
+ TestCsrrXiMacro(
+ "Csrci", "csrci {csr}, {uimm}", [&](uint32_t csr, uint32_t uimm) { __ Csrci(csr, uimm); });
+}
+
+TEST_F(AssemblerRISCV64Test, LoadConst32) {
+ // `LoadConst32()` emits the same code sequences as `Li()` for 32-bit values.
+ ScratchRegisterScope srs(GetAssembler());
+ srs.ExcludeXRegister(TMP);
+ srs.ExcludeXRegister(TMP2);
+ DriverStr(RepeatRIb(&Riscv64Assembler::LoadConst32, -32, "li {reg}, {imm}"), "LoadConst32");
+}
+
+TEST_F(AssemblerRISCV64Test, LoadConst64) {
+ SetUseSimpleMarch(true);
+ TestLoadConst64("LoadConst64",
+ /*can_use_tmp=*/ true,
+ [&](XRegister rd, int64_t value) { __ LoadConst64(rd, value); });
+}
+
+TEST_F(AssemblerRISCV64Test, AddConst32) {
+ auto emit_op = [&](XRegister rd, XRegister rs1, int64_t value) {
+ __ AddConst32(rd, rs1, dchecked_integral_cast<int32_t>(value));
+ };
+ TestAddConst("AddConst32", 32, /*suffix=*/ "w", emit_op);
+}
+
+TEST_F(AssemblerRISCV64Test, AddConst64) {
+ SetUseSimpleMarch(true);
+ auto emit_op = [&](XRegister rd, XRegister rs1, int64_t value) {
+ __ AddConst64(rd, rs1, value);
+ };
+ TestAddConst("AddConst64", 64, /*suffix=*/ "", emit_op);
+}
+
+TEST_F(AssemblerRISCV64Test, BcondForward3KiB) {
+ TestBcondForward("BcondForward3KiB", 3 * KB, "1", GetPrintBcond());
+}
+
+TEST_F(AssemblerRISCV64Test, BcondBackward3KiB) {
+ TestBcondBackward("BcondBackward3KiB", 3 * KB, "1", GetPrintBcond());
+}
+
+TEST_F(AssemblerRISCV64Test, BcondForward5KiB) {
+ TestBcondForward("BcondForward5KiB", 5 * KB, "1", GetPrintBcondOppositeAndJ("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, BcondBackward5KiB) {
+ TestBcondBackward("BcondBackward5KiB", 5 * KB, "1", GetPrintBcondOppositeAndJ("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, BcondForward2MiB) {
+ TestBcondForward("BcondForward2MiB", 2 * MB, "1", GetPrintBcondOppositeAndTail("2", "3"));
+}
+
+TEST_F(AssemblerRISCV64Test, BcondBackward2MiB) {
+ TestBcondBackward("BcondBackward2MiB", 2 * MB, "1", GetPrintBcondOppositeAndTail("2", "3"));
+}
+
+TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset13Forward) {
+ TestBeqA0A1Forward("BeqA0A1MaxOffset13Forward",
+ MaxOffset13ForwardDistance() - /*BEQ*/ 4u,
+ "1",
+ GetPrintBcond());
+}
+
+TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset13Backward) {
+ TestBeqA0A1Backward("BeqA0A1MaxOffset13Forward",
+ MaxOffset13BackwardDistance(),
+ "1",
+ GetPrintBcond());
+}
+
+TEST_F(AssemblerRISCV64Test, BeqA0A1OverMaxOffset13Forward) {
+ TestBeqA0A1Forward("BeqA0A1OverMaxOffset13Forward",
+ MaxOffset13ForwardDistance() - /*BEQ*/ 4u + /*Exceed max*/ 4u,
+ "1",
+ GetPrintBcondOppositeAndJ("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, BeqA0A1OverMaxOffset13Backward) {
+ TestBeqA0A1Backward("BeqA0A1OverMaxOffset13Forward",
+ MaxOffset13BackwardDistance() + /*Exceed max*/ 4u,
+ "1",
+ GetPrintBcondOppositeAndJ("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset21Forward) {
+ TestBeqA0A1Forward("BeqA0A1MaxOffset21Forward",
+ MaxOffset21ForwardDistance() - /*J*/ 4u,
+ "1",
+ GetPrintBcondOppositeAndJ("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset21Backward) {
+ TestBeqA0A1Backward("BeqA0A1MaxOffset21Backward",
+ MaxOffset21BackwardDistance() - /*BNE*/ 4u,
+ "1",
+ GetPrintBcondOppositeAndJ("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, BeqA0A1OverMaxOffset21Forward) {
+ TestBeqA0A1Forward("BeqA0A1OverMaxOffset21Forward",
+ MaxOffset21ForwardDistance() - /*J*/ 4u + /*Exceed max*/ 4u,
+ "1",
+ GetPrintBcondOppositeAndTail("2", "3"));
+}
+
+TEST_F(AssemblerRISCV64Test, BeqA0A1OverMaxOffset21Backward) {
+ TestBeqA0A1Backward("BeqA0A1OverMaxOffset21Backward",
+ MaxOffset21BackwardDistance() - /*BNE*/ 4u + /*Exceed max*/ 4u,
+ "1",
+ GetPrintBcondOppositeAndTail("2", "3"));
+}
+
+TEST_F(AssemblerRISCV64Test, BeqA0A1AlmostCascade) {
+ TestBeqA0A1MaybeCascade("BeqA0A1AlmostCascade", /*cascade=*/ false, GetPrintBcond());
+}
+
+TEST_F(AssemblerRISCV64Test, BeqA0A1Cascade) {
+ TestBeqA0A1MaybeCascade(
+ "BeqA0A1AlmostCascade", /*cascade=*/ true, GetPrintBcondOppositeAndJ("1"));
+}
+
+TEST_F(AssemblerRISCV64Test, BcondElimination) {
+ Riscv64Label label;
+ __ Bind(&label);
+ __ Nop();
+ for (XRegister reg : GetRegisters()) {
+ __ Bne(reg, reg, &label);
+ __ Blt(reg, reg, &label);
+ __ Bgt(reg, reg, &label);
+ __ Bltu(reg, reg, &label);
+ __ Bgtu(reg, reg, &label);
+ }
+ DriverStr("nop\n", "BcondElimination");
+}
+
+TEST_F(AssemblerRISCV64Test, BcondUnconditional) {
+ Riscv64Label label;
+ __ Bind(&label);
+ __ Nop();
+ for (XRegister reg : GetRegisters()) {
+ __ Beq(reg, reg, &label);
+ __ Bge(reg, reg, &label);
+ __ Ble(reg, reg, &label);
+ __ Bleu(reg, reg, &label);
+ __ Bgeu(reg, reg, &label);
+ }
+ std::string expected =
+ "1:\n"
+ "nop\n" +
+ RepeatInsn(5u * GetRegisters().size(), "j 1b\n", []() {});
+ DriverStr(expected, "BcondUnconditional");
+}
+
+TEST_F(AssemblerRISCV64Test, JalRdForward3KiB) {
+ TestJalRdForward("JalRdForward3KiB", 3 * KB, "1", GetPrintJalRd());
+}
+
+TEST_F(AssemblerRISCV64Test, JalRdBackward3KiB) {
+ TestJalRdBackward("JalRdBackward3KiB", 3 * KB, "1", GetPrintJalRd());
+}
+
+TEST_F(AssemblerRISCV64Test, JalRdForward2MiB) {
+ TestJalRdForward("JalRdForward2MiB", 2 * MB, "1", GetPrintCallRd("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, JalRdBackward2MiB) {
+ TestJalRdBackward("JalRdBackward2MiB", 2 * MB, "1", GetPrintCallRd("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, JForward3KiB) {
+ TestBuncondForward("JForward3KiB", 3 * KB, "1", GetEmitJ(), GetPrintJ());
+}
+
+TEST_F(AssemblerRISCV64Test, JBackward3KiB) {
+ TestBuncondBackward("JBackward3KiB", 3 * KB, "1", GetEmitJ(), GetPrintJ());
+}
+
+TEST_F(AssemblerRISCV64Test, JForward2MiB) {
+ TestBuncondForward("JForward2MiB", 2 * MB, "1", GetEmitJ(), GetPrintTail("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, JBackward2MiB) {
+ TestBuncondBackward("JBackward2MiB", 2 * MB, "1", GetEmitJ(), GetPrintTail("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, JMaxOffset21Forward) {
+ TestBuncondForward("JMaxOffset21Forward",
+ MaxOffset21ForwardDistance() - /*J*/ 4u,
+ "1",
+ GetEmitJ(),
+ GetPrintJ());
+}
+
+TEST_F(AssemblerRISCV64Test, JMaxOffset21Backward) {
+ TestBuncondBackward("JMaxOffset21Backward",
+ MaxOffset21BackwardDistance(),
+ "1",
+ GetEmitJ(),
+ GetPrintJ());
+}
+
+TEST_F(AssemblerRISCV64Test, JOverMaxOffset21Forward) {
+ TestBuncondForward("JOverMaxOffset21Forward",
+ MaxOffset21ForwardDistance() - /*J*/ 4u + /*Exceed max*/ 4u,
+ "1",
+ GetEmitJ(),
+ GetPrintTail("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, JOverMaxOffset21Backward) {
+ TestBuncondBackward("JMaxOffset21Backward",
+ MaxOffset21BackwardDistance() + /*Exceed max*/ 4u,
+ "1",
+ GetEmitJ(),
+ GetPrintTail("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, CallForward3KiB) {
+ TestBuncondForward("CallForward3KiB", 3 * KB, "1", GetEmitJal(), GetPrintJal());
+}
+
+TEST_F(AssemblerRISCV64Test, CallBackward3KiB) {
+ TestBuncondBackward("CallBackward3KiB", 3 * KB, "1", GetEmitJal(), GetPrintJal());
+}
+
+TEST_F(AssemblerRISCV64Test, CallForward2MiB) {
+ TestBuncondForward("CallForward2MiB", 2 * MB, "1", GetEmitJal(), GetPrintCall("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, CallBackward2MiB) {
+ TestBuncondBackward("CallBackward2MiB", 2 * MB, "1", GetEmitJal(), GetPrintCall("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, CallMaxOffset21Forward) {
+ TestBuncondForward("CallMaxOffset21Forward",
+ MaxOffset21ForwardDistance() - /*J*/ 4u,
+ "1",
+ GetEmitJal(),
+ GetPrintJal());
+}
+
+TEST_F(AssemblerRISCV64Test, CallMaxOffset21Backward) {
+ TestBuncondBackward("CallMaxOffset21Backward",
+ MaxOffset21BackwardDistance(),
+ "1",
+ GetEmitJal(),
+ GetPrintJal());
+}
+
+TEST_F(AssemblerRISCV64Test, CallOverMaxOffset21Forward) {
+ TestBuncondForward("CallOverMaxOffset21Forward",
+ MaxOffset21ForwardDistance() - /*J*/ 4u + /*Exceed max*/ 4u,
+ "1",
+ GetEmitJal(),
+ GetPrintCall("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, CallOverMaxOffset21Backward) {
+ TestBuncondBackward("CallMaxOffset21Backward",
+ MaxOffset21BackwardDistance() + /*Exceed max*/ 4u,
+ "1",
+ GetEmitJal(),
+ GetPrintCall("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, Loadb) {
+ TestLoadStoreArbitraryOffset("Loadb", "lb", &Riscv64Assembler::Loadb, /*is_store=*/ false);
+}
+
+TEST_F(AssemblerRISCV64Test, Loadh) {
+ TestLoadStoreArbitraryOffset("Loadh", "lh", &Riscv64Assembler::Loadh, /*is_store=*/ false);
+}
+
+TEST_F(AssemblerRISCV64Test, Loadw) {
+ TestLoadStoreArbitraryOffset("Loadw", "lw", &Riscv64Assembler::Loadw, /*is_store=*/ false);
+}
+
+TEST_F(AssemblerRISCV64Test, Loadd) {
+ TestLoadStoreArbitraryOffset("Loadd", "ld", &Riscv64Assembler::Loadd, /*is_store=*/ false);
+}
+
+TEST_F(AssemblerRISCV64Test, Loadbu) {
+ TestLoadStoreArbitraryOffset("Loadbu", "lbu", &Riscv64Assembler::Loadbu, /*is_store=*/ false);
+}
+
+TEST_F(AssemblerRISCV64Test, Loadhu) {
+ TestLoadStoreArbitraryOffset("Loadhu", "lhu", &Riscv64Assembler::Loadhu, /*is_store=*/ false);
+}
+
+TEST_F(AssemblerRISCV64Test, Loadwu) {
+ TestLoadStoreArbitraryOffset("Loadwu", "lwu", &Riscv64Assembler::Loadwu, /*is_store=*/ false);
+}
+
+TEST_F(AssemblerRISCV64Test, Storeb) {
+ TestLoadStoreArbitraryOffset("Storeb", "sb", &Riscv64Assembler::Storeb, /*is_store=*/ true);
+}
+
+TEST_F(AssemblerRISCV64Test, Storeh) {
+ TestLoadStoreArbitraryOffset("Storeh", "sh", &Riscv64Assembler::Storeh, /*is_store=*/ true);
+}
+
+TEST_F(AssemblerRISCV64Test, Storew) {
+ TestLoadStoreArbitraryOffset("Storew", "sw", &Riscv64Assembler::Storew, /*is_store=*/ true);
+}
+
+TEST_F(AssemblerRISCV64Test, Stored) {
+ TestLoadStoreArbitraryOffset("Stored", "sd", &Riscv64Assembler::Stored, /*is_store=*/ true);
+}
+
+TEST_F(AssemblerRISCV64Test, FLoadw) {
+ TestFPLoadStoreArbitraryOffset("FLoadw", "flw", &Riscv64Assembler::FLoadw);
+}
+
+TEST_F(AssemblerRISCV64Test, FLoadd) {
+ TestFPLoadStoreArbitraryOffset("FLoadd", "fld", &Riscv64Assembler::FLoadd);
+}
+
+TEST_F(AssemblerRISCV64Test, FStorew) {
+ TestFPLoadStoreArbitraryOffset("FStorew", "fsw", &Riscv64Assembler::FStorew);
+}
+
+TEST_F(AssemblerRISCV64Test, FStored) {
+ TestFPLoadStoreArbitraryOffset("FStored", "fsd", &Riscv64Assembler::FStored);
+}
+
+TEST_F(AssemblerRISCV64Test, Unimp) {
+ __ Unimp();
+ DriverStr("unimp\n", "Unimp");
+}
+
+TEST_F(AssemblerRISCV64Test, LoadLabelAddress) {
+ std::string expected;
+ constexpr size_t kNumLoadsForward = 4 * KB;
+ constexpr size_t kNumLoadsBackward = 4 * KB;
+ Riscv64Label label;
+ auto emit_batch = [&](size_t num_loads, const std::string& target_label) {
+ for (size_t i = 0; i != num_loads; ++i) {
+ // Cycle through non-Zero registers.
+ XRegister rd = enum_cast<XRegister>((i % (kNumberOfXRegisters - 1)) + 1);
+ DCHECK_NE(rd, Zero);
+ std::string rd_name = GetRegisterName(rd);
+ __ LoadLabelAddress(rd, &label);
+ expected += "1:\n";
+ expected += ART_FORMAT("auipc {}, %pcrel_hi({})\n", rd_name, target_label);
+ expected += ART_FORMAT("addi {}, {}, %pcrel_lo(1b)\n", rd_name, rd_name);
+ }
+ };
+ emit_batch(kNumLoadsForward, "2f");
+ __ Bind(&label);
+ expected += "2:\n";
+ emit_batch(kNumLoadsBackward, "2b");
+ DriverStr(expected, "LoadLabelAddress");
+}
+
+TEST_F(AssemblerRISCV64Test, LoadLiteralWithPaddingForLong) {
+ TestLoadLiteral("LoadLiteralWithPaddingForLong", /*with_padding_for_long=*/ true);
+}
+
+TEST_F(AssemblerRISCV64Test, LoadLiteralWithoutPaddingForLong) {
+ TestLoadLiteral("LoadLiteralWithoutPaddingForLong", /*with_padding_for_long=*/ false);
+}
+
+TEST_F(AssemblerRISCV64Test, JumpTable) {
+ std::string expected;
+ expected += EmitNops(sizeof(uint32_t));
+ Riscv64Label targets[4];
+ uint32_t target_locations[4];
+ JumpTable* jump_table = __ CreateJumpTable(ArenaVector<Riscv64Label*>(
+ {&targets[0], &targets[1], &targets[2], &targets[3]}, __ GetAllocator()->Adapter()));
+ for (size_t i : {0, 1, 2, 3}) {
+ target_locations[i] = __ CodeSize();
+ __ Bind(&targets[i]);
+ expected += std::to_string(i) + ":\n";
+ expected += EmitNops(sizeof(uint32_t));
+ }
+ __ LoadLabelAddress(A0, jump_table->GetLabel());
+ expected += "4:\n"
+ "auipc a0, %pcrel_hi(5f)\n"
+ "addi a0, a0, %pcrel_lo(4b)\n";
+ expected += EmitNops(sizeof(uint32_t));
+ uint32_t label5_location = __ CodeSize();
+ auto target_offset = [&](size_t i) {
+ // Even with `-mno-relax`, clang assembler does not fully resolve `.4byte 0b - 5b`
+ // and emits a relocation, so we need to calculate target offsets ourselves.
+ return std::to_string(static_cast<int64_t>(target_locations[i] - label5_location));
+ };
+ expected += "5:\n"
+ ".4byte " + target_offset(0) + "\n"
+ ".4byte " + target_offset(1) + "\n"
+ ".4byte " + target_offset(2) + "\n"
+ ".4byte " + target_offset(3) + "\n";
+ DriverStr(expected, "JumpTable");
+}
+
+TEST_F(AssemblerRISCV64Test, ScratchRegisters) {
+ ScratchRegisterScope srs(GetAssembler());
+ ASSERT_EQ(2u, srs.AvailableXRegisters()); // Default: TMP(T6) and TMP2(T5).
+ ASSERT_EQ(1u, srs.AvailableFRegisters()); // Default: FTMP(FT11).
+
+ XRegister tmp = srs.AllocateXRegister();
+ EXPECT_EQ(TMP, tmp);
+ XRegister tmp2 = srs.AllocateXRegister();
+ EXPECT_EQ(TMP2, tmp2);
+ ASSERT_EQ(0u, srs.AvailableXRegisters());
+
+ FRegister ftmp = srs.AllocateFRegister();
+ EXPECT_EQ(FTMP, ftmp);
+ ASSERT_EQ(0u, srs.AvailableFRegisters());
+
+ // Test nesting.
+ srs.FreeXRegister(A0);
+ srs.FreeXRegister(A1);
+ srs.FreeFRegister(FA0);
+ srs.FreeFRegister(FA1);
+ ASSERT_EQ(2u, srs.AvailableXRegisters());
+ ASSERT_EQ(2u, srs.AvailableFRegisters());
+ {
+ ScratchRegisterScope srs2(GetAssembler());
+ ASSERT_EQ(2u, srs2.AvailableXRegisters());
+ ASSERT_EQ(2u, srs2.AvailableFRegisters());
+ XRegister a1 = srs2.AllocateXRegister();
+ EXPECT_EQ(A1, a1);
+ XRegister a0 = srs2.AllocateXRegister();
+ EXPECT_EQ(A0, a0);
+ ASSERT_EQ(0u, srs2.AvailableXRegisters());
+ FRegister fa1 = srs2.AllocateFRegister();
+ EXPECT_EQ(FA1, fa1);
+ FRegister fa0 = srs2.AllocateFRegister();
+ EXPECT_EQ(FA0, fa0);
+ ASSERT_EQ(0u, srs2.AvailableFRegisters());
+ }
+ ASSERT_EQ(2u, srs.AvailableXRegisters());
+ ASSERT_EQ(2u, srs.AvailableFRegisters());
+
+ srs.IncludeXRegister(A0); // No-op as the register was already available.
+ ASSERT_EQ(2u, srs.AvailableXRegisters());
+ srs.IncludeFRegister(FA0); // No-op as the register was already available.
+ ASSERT_EQ(2u, srs.AvailableFRegisters());
+ srs.IncludeXRegister(S0);
+ ASSERT_EQ(3u, srs.AvailableXRegisters());
+ srs.IncludeFRegister(FS0);
+ ASSERT_EQ(3u, srs.AvailableFRegisters());
+
+ srs.ExcludeXRegister(S1); // No-op as the register was not available.
+ ASSERT_EQ(3u, srs.AvailableXRegisters());
+ srs.ExcludeFRegister(FS1); // No-op as the register was not available.
+ ASSERT_EQ(3u, srs.AvailableFRegisters());
+ srs.ExcludeXRegister(A0);
+ ASSERT_EQ(2u, srs.AvailableXRegisters());
+ srs.ExcludeFRegister(FA0);
+ ASSERT_EQ(2u, srs.AvailableFRegisters());
+}
+
+#undef __
+
+} // namespace riscv64
+} // namespace art
diff --git a/compiler/utils/riscv64/jni_macro_assembler_riscv64.cc b/compiler/utils/riscv64/jni_macro_assembler_riscv64.cc
new file mode 100644
index 0000000000..3aeee8a154
--- /dev/null
+++ b/compiler/utils/riscv64/jni_macro_assembler_riscv64.cc
@@ -0,0 +1,633 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni_macro_assembler_riscv64.h"
+
+#include "base/bit_utils_iterator.h"
+#include "dwarf/register.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "indirect_reference_table.h"
+#include "lock_word.h"
+#include "managed_register_riscv64.h"
+#include "offsets.h"
+#include "thread.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+static constexpr size_t kSpillSize = 8; // Both GPRs and FPRs
+
+static std::pair<uint32_t, uint32_t> GetCoreAndFpSpillMasks(
+ ArrayRef<const ManagedRegister> callee_save_regs) {
+ uint32_t core_spill_mask = 0u;
+ uint32_t fp_spill_mask = 0u;
+ for (ManagedRegister r : callee_save_regs) {
+ Riscv64ManagedRegister reg = r.AsRiscv64();
+ if (reg.IsXRegister()) {
+ core_spill_mask |= 1u << reg.AsXRegister();
+ } else {
+ DCHECK(reg.IsFRegister());
+ fp_spill_mask |= 1u << reg.AsFRegister();
+ }
+ }
+ DCHECK_EQ(callee_save_regs.size(),
+ dchecked_integral_cast<size_t>(POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask)));
+ return {core_spill_mask, fp_spill_mask};
+}
+
+#define __ asm_.
+
+Riscv64JNIMacroAssembler::~Riscv64JNIMacroAssembler() {
+}
+
+void Riscv64JNIMacroAssembler::FinalizeCode() {
+ __ FinalizeCode();
+}
+
+void Riscv64JNIMacroAssembler::BuildFrame(size_t frame_size,
+ ManagedRegister method_reg,
+ ArrayRef<const ManagedRegister> callee_save_regs) {
+ // Increase frame to required size.
+ DCHECK_ALIGNED(frame_size, kStackAlignment);
+ // Must at least have space for Method* if we're going to spill it.
+ DCHECK_GE(frame_size,
+ (callee_save_regs.size() + (method_reg.IsRegister() ? 1u : 0u)) * kSpillSize);
+ IncreaseFrameSize(frame_size);
+
+ // Save callee-saves.
+ auto [core_spill_mask, fp_spill_mask] = GetCoreAndFpSpillMasks(callee_save_regs);
+ size_t offset = frame_size;
+ if ((core_spill_mask & (1u << RA)) != 0u) {
+ offset -= kSpillSize;
+ __ Stored(RA, SP, offset);
+ __ cfi().RelOffset(dwarf::Reg::Riscv64Core(RA), offset);
+ }
+ for (uint32_t reg : HighToLowBits(core_spill_mask & ~(1u << RA))) {
+ offset -= kSpillSize;
+ __ Stored(enum_cast<XRegister>(reg), SP, offset);
+ __ cfi().RelOffset(dwarf::Reg::Riscv64Core(enum_cast<XRegister>(reg)), offset);
+ }
+ for (uint32_t reg : HighToLowBits(fp_spill_mask)) {
+ offset -= kSpillSize;
+ __ FStored(enum_cast<FRegister>(reg), SP, offset);
+ __ cfi().RelOffset(dwarf::Reg::Riscv64Fp(enum_cast<FRegister>(reg)), offset);
+ }
+
+ if (method_reg.IsRegister()) {
+ // Write ArtMethod*.
+ DCHECK_EQ(A0, method_reg.AsRiscv64().AsXRegister());
+ __ Stored(A0, SP, 0);
+ }
+}
+
+void Riscv64JNIMacroAssembler::RemoveFrame(size_t frame_size,
+ ArrayRef<const ManagedRegister> callee_save_regs,
+ [[maybe_unused]] bool may_suspend) {
+ cfi().RememberState();
+
+ // Restore callee-saves.
+ auto [core_spill_mask, fp_spill_mask] = GetCoreAndFpSpillMasks(callee_save_regs);
+ size_t offset = frame_size - callee_save_regs.size() * kSpillSize;
+ for (uint32_t reg : LowToHighBits(fp_spill_mask)) {
+ __ FLoadd(enum_cast<FRegister>(reg), SP, offset);
+ __ cfi().Restore(dwarf::Reg::Riscv64Fp(enum_cast<FRegister>(reg)));
+ offset += kSpillSize;
+ }
+ for (uint32_t reg : LowToHighBits(core_spill_mask & ~(1u << RA))) {
+ __ Loadd(enum_cast<XRegister>(reg), SP, offset);
+ __ cfi().Restore(dwarf::Reg::Riscv64Core(enum_cast<XRegister>(reg)));
+ offset += kSpillSize;
+ }
+ if ((core_spill_mask & (1u << RA)) != 0u) {
+ __ Loadd(RA, SP, offset);
+ __ cfi().Restore(dwarf::Reg::Riscv64Core(RA));
+ offset += kSpillSize;
+ }
+ DCHECK_EQ(offset, frame_size);
+
+ // Decrease the frame size.
+ DecreaseFrameSize(frame_size);
+
+ // Return to RA.
+ __ Ret();
+
+ // The CFI should be restored for any code that follows the exit block.
+ __ cfi().RestoreState();
+ __ cfi().DefCFAOffset(frame_size);
+}
+
+void Riscv64JNIMacroAssembler::IncreaseFrameSize(size_t adjust) {
+ if (adjust != 0u) {
+ CHECK_ALIGNED(adjust, kStackAlignment);
+ int64_t adjustment = dchecked_integral_cast<int64_t>(adjust);
+ __ AddConst64(SP, SP, -adjustment);
+ __ cfi().AdjustCFAOffset(adjustment);
+ }
+}
+
+void Riscv64JNIMacroAssembler::DecreaseFrameSize(size_t adjust) {
+ if (adjust != 0u) {
+ CHECK_ALIGNED(adjust, kStackAlignment);
+ int64_t adjustment = dchecked_integral_cast<int64_t>(adjust);
+ __ AddConst64(SP, SP, adjustment);
+ __ cfi().AdjustCFAOffset(-adjustment);
+ }
+}
+
+ManagedRegister Riscv64JNIMacroAssembler::CoreRegisterWithSize(ManagedRegister src, size_t size) {
+ DCHECK(src.AsRiscv64().IsXRegister());
+ DCHECK(size == 4u || size == 8u) << size;
+ return src;
+}
+
+void Riscv64JNIMacroAssembler::Store(FrameOffset offs, ManagedRegister m_src, size_t size) {
+ Store(Riscv64ManagedRegister::FromXRegister(SP), MemberOffset(offs.Int32Value()), m_src, size);
+}
+
+void Riscv64JNIMacroAssembler::Store(ManagedRegister m_base,
+ MemberOffset offs,
+ ManagedRegister m_src,
+ size_t size) {
+ Riscv64ManagedRegister base = m_base.AsRiscv64();
+ Riscv64ManagedRegister src = m_src.AsRiscv64();
+ if (src.IsXRegister()) {
+ if (size == 4u) {
+ __ Storew(src.AsXRegister(), base.AsXRegister(), offs.Int32Value());
+ } else {
+ CHECK_EQ(8u, size);
+ __ Stored(src.AsXRegister(), base.AsXRegister(), offs.Int32Value());
+ }
+ } else {
+ CHECK(src.IsFRegister()) << src;
+ if (size == 4u) {
+ __ FStorew(src.AsFRegister(), base.AsXRegister(), offs.Int32Value());
+ } else {
+ CHECK_EQ(8u, size);
+ __ FStored(src.AsFRegister(), base.AsXRegister(), offs.Int32Value());
+ }
+ }
+}
+
+void Riscv64JNIMacroAssembler::StoreRawPtr(FrameOffset offs, ManagedRegister m_src) {
+ Riscv64ManagedRegister sp = Riscv64ManagedRegister::FromXRegister(SP);
+ Store(sp, MemberOffset(offs.Int32Value()), m_src, static_cast<size_t>(kRiscv64PointerSize));
+}
+
+void Riscv64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 offs, bool tag_sp) {
+ XRegister src = SP;
+ ScratchRegisterScope srs(&asm_);
+ if (tag_sp) {
+ XRegister tmp = srs.AllocateXRegister();
+ __ Ori(tmp, SP, 0x2);
+ src = tmp;
+ }
+ __ Stored(src, TR, offs.Int32Value());
+}
+
+void Riscv64JNIMacroAssembler::Load(ManagedRegister m_dest, FrameOffset offs, size_t size) {
+ Riscv64ManagedRegister sp = Riscv64ManagedRegister::FromXRegister(SP);
+ Load(m_dest, sp, MemberOffset(offs.Int32Value()), size);
+}
+
+void Riscv64JNIMacroAssembler::Load(ManagedRegister m_dest,
+ ManagedRegister m_base,
+ MemberOffset offs,
+ size_t size) {
+ Riscv64ManagedRegister base = m_base.AsRiscv64();
+ Riscv64ManagedRegister dest = m_dest.AsRiscv64();
+ if (dest.IsXRegister()) {
+ if (size == 4u) {
+ // The riscv64 native calling convention specifies that integers narrower than XLEN (64)
+ // bits are "widened according to the sign of their type up to 32 bits, then sign-extended
+ // to XLEN bits." The managed ABI already passes integral values this way in registers
+ // and correctly widened to 32 bits on the stack. The `Load()` must sign-extend narrower
+ // types here to pass integral values correctly to the native call.
+ // For `float` args, the upper 32 bits are undefined, so this is fine for them as well.
+ __ Loadw(dest.AsXRegister(), base.AsXRegister(), offs.Int32Value());
+ } else {
+ CHECK_EQ(8u, size);
+ __ Loadd(dest.AsXRegister(), base.AsXRegister(), offs.Int32Value());
+ }
+ } else {
+ CHECK(dest.IsFRegister()) << dest;
+ if (size == 4u) {
+ __ FLoadw(dest.AsFRegister(), base.AsXRegister(), offs.Int32Value());
+ } else {
+ CHECK_EQ(8u, size);
+ __ FLoadd(dest.AsFRegister(), base.AsXRegister(), offs.Int32Value());
+ }
+ }
+}
+
+void Riscv64JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister m_dest, ThreadOffset64 offs) {
+ Riscv64ManagedRegister tr = Riscv64ManagedRegister::FromXRegister(TR);
+ Load(m_dest, tr, MemberOffset(offs.Int32Value()), static_cast<size_t>(kRiscv64PointerSize));
+}
+
+void Riscv64JNIMacroAssembler::LoadGcRootWithoutReadBarrier(ManagedRegister m_dest,
+ ManagedRegister m_base,
+ MemberOffset offs) {
+ Riscv64ManagedRegister base = m_base.AsRiscv64();
+ Riscv64ManagedRegister dest = m_dest.AsRiscv64();
+ __ Loadwu(dest.AsXRegister(), base.AsXRegister(), offs.Int32Value());
+}
+
+void Riscv64JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests,
+ ArrayRef<ArgumentLocation> srcs,
+ ArrayRef<FrameOffset> refs) {
+ size_t arg_count = dests.size();
+ DCHECK_EQ(arg_count, srcs.size());
+ DCHECK_EQ(arg_count, refs.size());
+
+ auto get_mask = [](ManagedRegister reg) -> uint64_t {
+ Riscv64ManagedRegister riscv64_reg = reg.AsRiscv64();
+ if (riscv64_reg.IsXRegister()) {
+ size_t core_reg_number = static_cast<size_t>(riscv64_reg.AsXRegister());
+ DCHECK_LT(core_reg_number, 32u);
+ return UINT64_C(1) << core_reg_number;
+ } else {
+ DCHECK(riscv64_reg.IsFRegister());
+ size_t fp_reg_number = static_cast<size_t>(riscv64_reg.AsFRegister());
+ DCHECK_LT(fp_reg_number, 32u);
+ return (UINT64_C(1) << 32u) << fp_reg_number;
+ }
+ };
+
+ // Collect registers to move while storing/copying args to stack slots.
+ // Convert processed references to `jobject`.
+ uint64_t src_regs = 0u;
+ uint64_t dest_regs = 0u;
+ for (size_t i = 0; i != arg_count; ++i) {
+ const ArgumentLocation& src = srcs[i];
+ const ArgumentLocation& dest = dests[i];
+ const FrameOffset ref = refs[i];
+ if (ref != kInvalidReferenceOffset) {
+ DCHECK_EQ(src.GetSize(), kObjectReferenceSize);
+ DCHECK_EQ(dest.GetSize(), static_cast<size_t>(kRiscv64PointerSize));
+ } else {
+ DCHECK(src.GetSize() == 4u || src.GetSize() == 8u) << src.GetSize();
+ DCHECK(dest.GetSize() == 4u || dest.GetSize() == 8u) << dest.GetSize();
+ DCHECK_LE(src.GetSize(), dest.GetSize());
+ }
+ if (dest.IsRegister()) {
+ if (src.IsRegister() && src.GetRegister().Equals(dest.GetRegister())) {
+ // No move is necessary but we may need to convert a reference to a `jobject`.
+ if (ref != kInvalidReferenceOffset) {
+ CreateJObject(dest.GetRegister(), ref, src.GetRegister(), /*null_allowed=*/ i != 0u);
+ }
+ } else {
+ if (src.IsRegister()) {
+ src_regs |= get_mask(src.GetRegister());
+ }
+ dest_regs |= get_mask(dest.GetRegister());
+ }
+ } else {
+ ScratchRegisterScope srs(&asm_);
+ Riscv64ManagedRegister reg = src.IsRegister()
+ ? src.GetRegister().AsRiscv64()
+ : Riscv64ManagedRegister::FromXRegister(srs.AllocateXRegister());
+ if (!src.IsRegister()) {
+ if (ref != kInvalidReferenceOffset) {
+ // We're loading the reference only for comparison with null, so it does not matter
+ // if we sign- or zero-extend but let's correctly zero-extend the reference anyway.
+ __ Loadwu(reg.AsRiscv64().AsXRegister(), SP, src.GetFrameOffset().SizeValue());
+ } else {
+ Load(reg, src.GetFrameOffset(), src.GetSize());
+ }
+ }
+ if (ref != kInvalidReferenceOffset) {
+ DCHECK_NE(i, 0u);
+ CreateJObject(reg, ref, reg, /*null_allowed=*/ true);
+ }
+ Store(dest.GetFrameOffset(), reg, dest.GetSize());
+ }
+ }
+
+ // Fill destination registers.
+ // There should be no cycles, so this simple algorithm should make progress.
+ while (dest_regs != 0u) {
+ uint64_t old_dest_regs = dest_regs;
+ for (size_t i = 0; i != arg_count; ++i) {
+ const ArgumentLocation& src = srcs[i];
+ const ArgumentLocation& dest = dests[i];
+ const FrameOffset ref = refs[i];
+ if (!dest.IsRegister()) {
+ continue; // Stored in first loop above.
+ }
+ uint64_t dest_reg_mask = get_mask(dest.GetRegister());
+ if ((dest_reg_mask & dest_regs) == 0u) {
+ continue; // Equals source, or already filled in one of previous iterations.
+ }
+ if ((dest_reg_mask & src_regs) != 0u) {
+ continue; // Cannot clobber this register yet.
+ }
+ if (src.IsRegister()) {
+ if (ref != kInvalidReferenceOffset) {
+ DCHECK_NE(i, 0u); // The `this` arg remains in the same register (handled above).
+ CreateJObject(dest.GetRegister(), ref, src.GetRegister(), /*null_allowed=*/ true);
+ } else {
+ Move(dest.GetRegister(), src.GetRegister(), dest.GetSize());
+ }
+ src_regs &= ~get_mask(src.GetRegister()); // Allow clobbering source register.
+ } else {
+ Load(dest.GetRegister(), src.GetFrameOffset(), src.GetSize());
+ // No `jobject` conversion needed. There are enough arg registers in managed ABI
+ // to hold all references that yield a register arg `jobject` in native ABI.
+ DCHECK_EQ(ref, kInvalidReferenceOffset);
+ }
+ dest_regs &= ~get_mask(dest.GetRegister()); // Destination register was filled.
+ }
+ CHECK_NE(old_dest_regs, dest_regs);
+ DCHECK_EQ(0u, dest_regs & ~old_dest_regs);
+ }
+}
+
+void Riscv64JNIMacroAssembler::Move(ManagedRegister m_dest, ManagedRegister m_src, size_t size) {
+ // Note: This function is used only for moving between GPRs.
+ // FP argument registers hold the same arguments in managed and native ABIs.
+ DCHECK(size == 4u || size == 8u) << size;
+ Riscv64ManagedRegister dest = m_dest.AsRiscv64();
+ Riscv64ManagedRegister src = m_src.AsRiscv64();
+ DCHECK(dest.IsXRegister());
+ DCHECK(src.IsXRegister());
+ if (!dest.Equals(src)) {
+ __ Mv(dest.AsXRegister(), src.AsXRegister());
+ }
+}
+
+void Riscv64JNIMacroAssembler::Move(ManagedRegister m_dest, size_t value) {
+ DCHECK(m_dest.AsRiscv64().IsXRegister());
+ __ LoadConst64(m_dest.AsRiscv64().AsXRegister(), dchecked_integral_cast<int64_t>(value));
+}
+
+void Riscv64JNIMacroAssembler::SignExtend([[maybe_unused]] ManagedRegister mreg,
+ [[maybe_unused]] size_t size) {
+ LOG(FATAL) << "The result is already sign-extended in the native ABI.";
+ UNREACHABLE();
+}
+
+void Riscv64JNIMacroAssembler::ZeroExtend([[maybe_unused]] ManagedRegister mreg,
+ [[maybe_unused]] size_t size) {
+ LOG(FATAL) << "The result is already zero-extended in the native ABI.";
+ UNREACHABLE();
+}
+
+void Riscv64JNIMacroAssembler::GetCurrentThread(ManagedRegister dest) {
+ DCHECK(dest.AsRiscv64().IsXRegister());
+ __ Mv(dest.AsRiscv64().AsXRegister(), TR);
+}
+
+void Riscv64JNIMacroAssembler::GetCurrentThread(FrameOffset offset) {
+ __ Stored(TR, SP, offset.Int32Value());
+}
+
+void Riscv64JNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister m_reg,
+ JNIMacroLabel* slow_path,
+ JNIMacroLabel* resume) {
+ // This implements the fast-path of `Thread::DecodeJObject()`.
+ constexpr int64_t kGlobalOrWeakGlobalMask = IndirectReferenceTable::GetGlobalOrWeakGlobalMask();
+ DCHECK(IsInt<12>(kGlobalOrWeakGlobalMask));
+ constexpr int64_t kIndirectRefKindMask = IndirectReferenceTable::GetIndirectRefKindMask();
+ DCHECK(IsInt<12>(kIndirectRefKindMask));
+ XRegister reg = m_reg.AsRiscv64().AsXRegister();
+ __ Beqz(reg, Riscv64JNIMacroLabel::Cast(resume)->AsRiscv64()); // Skip test and load for null.
+ __ Andi(TMP, reg, kGlobalOrWeakGlobalMask);
+ __ Bnez(TMP, Riscv64JNIMacroLabel::Cast(slow_path)->AsRiscv64());
+ __ Andi(reg, reg, ~kIndirectRefKindMask);
+ __ Loadw(reg, reg, 0);
+}
+
+void Riscv64JNIMacroAssembler::VerifyObject([[maybe_unused]] ManagedRegister m_src,
+ [[maybe_unused]] bool could_be_null) {
+ // TODO: not validating references.
+}
+
+void Riscv64JNIMacroAssembler::VerifyObject([[maybe_unused]] FrameOffset src,
+ [[maybe_unused]] bool could_be_null) {
+ // TODO: not validating references.
+}
+
+void Riscv64JNIMacroAssembler::Jump(ManagedRegister m_base, Offset offs) {
+ Riscv64ManagedRegister base = m_base.AsRiscv64();
+ CHECK(base.IsXRegister()) << base;
+ ScratchRegisterScope srs(&asm_);
+ XRegister tmp = srs.AllocateXRegister();
+ __ Loadd(tmp, base.AsXRegister(), offs.Int32Value());
+ __ Jr(tmp);
+}
+
+void Riscv64JNIMacroAssembler::Call(ManagedRegister m_base, Offset offs) {
+ Riscv64ManagedRegister base = m_base.AsRiscv64();
+ CHECK(base.IsXRegister()) << base;
+ __ Loadd(RA, base.AsXRegister(), offs.Int32Value());
+ __ Jalr(RA);
+}
+
+
+void Riscv64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset) {
+ Call(Riscv64ManagedRegister::FromXRegister(TR), offset);
+}
+
+void Riscv64JNIMacroAssembler::TryToTransitionFromRunnableToNative(
+ JNIMacroLabel* label,
+ ArrayRef<const ManagedRegister> scratch_regs) {
+ constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
+ constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
+ constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kRiscv64PointerSize>();
+ constexpr ThreadOffset64 thread_held_mutex_mutator_lock_offset =
+ Thread::HeldMutexOffset<kRiscv64PointerSize>(kMutatorLock);
+
+ DCHECK_GE(scratch_regs.size(), 2u);
+ XRegister scratch = scratch_regs[0].AsRiscv64().AsXRegister();
+ XRegister scratch2 = scratch_regs[1].AsRiscv64().AsXRegister();
+
+ // CAS release, old_value = kRunnableStateValue, new_value = kNativeStateValue, no flags.
+ Riscv64Label retry;
+ __ Bind(&retry);
+ static_assert(thread_flags_offset.Int32Value() == 0); // LR/SC require exact address.
+ __ LrW(scratch, TR, AqRl::kNone);
+ __ Li(scratch2, kNativeStateValue);
+ // If any flags are set, go to the slow path.
+ static_assert(kRunnableStateValue == 0u);
+ __ Bnez(scratch, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64());
+ __ ScW(scratch, scratch2, TR, AqRl::kRelease);
+ __ Bnez(scratch, &retry);
+
+ // Clear `self->tlsPtr_.held_mutexes[kMutatorLock]`.
+ __ Stored(Zero, TR, thread_held_mutex_mutator_lock_offset.Int32Value());
+}
+
+void Riscv64JNIMacroAssembler::TryToTransitionFromNativeToRunnable(
+ JNIMacroLabel* label,
+ ArrayRef<const ManagedRegister> scratch_regs,
+ ManagedRegister return_reg) {
+ constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
+ constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
+ constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kRiscv64PointerSize>();
+ constexpr ThreadOffset64 thread_held_mutex_mutator_lock_offset =
+ Thread::HeldMutexOffset<kRiscv64PointerSize>(kMutatorLock);
+ constexpr ThreadOffset64 thread_mutator_lock_offset =
+ Thread::MutatorLockOffset<kRiscv64PointerSize>();
+
+ DCHECK_GE(scratch_regs.size(), 2u);
+ DCHECK(!scratch_regs[0].AsRiscv64().Overlaps(return_reg.AsRiscv64()));
+ XRegister scratch = scratch_regs[0].AsRiscv64().AsXRegister();
+ DCHECK(!scratch_regs[1].AsRiscv64().Overlaps(return_reg.AsRiscv64()));
+ XRegister scratch2 = scratch_regs[1].AsRiscv64().AsXRegister();
+
+ // CAS acquire, old_value = kNativeStateValue, new_value = kRunnableStateValue, no flags.
+ Riscv64Label retry;
+ __ Bind(&retry);
+ static_assert(thread_flags_offset.Int32Value() == 0); // LR/SC require exact address.
+ __ LrW(scratch, TR, AqRl::kAcquire);
+ __ Li(scratch2, kNativeStateValue);
+ // If any flags are set, or the state is not Native, go to the slow path.
+ // (While the thread can theoretically transition between different Suspended states,
+ // it would be very unexpected to see a state other than Native at this point.)
+ __ Bne(scratch, scratch2, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64());
+ static_assert(kRunnableStateValue == 0u);
+ __ ScW(scratch, Zero, TR, AqRl::kNone);
+ __ Bnez(scratch, &retry);
+
+ // Set `self->tlsPtr_.held_mutexes[kMutatorLock]` to the mutator lock.
+ __ Loadd(scratch, TR, thread_mutator_lock_offset.Int32Value());
+ __ Stored(scratch, TR, thread_held_mutex_mutator_lock_offset.Int32Value());
+}
+
+void Riscv64JNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) {
+ ScratchRegisterScope srs(&asm_);
+ XRegister tmp = srs.AllocateXRegister();
+ __ Loadw(tmp, TR, Thread::ThreadFlagsOffset<kRiscv64PointerSize>().Int32Value());
+ DCHECK(IsInt<12>(dchecked_integral_cast<int32_t>(Thread::SuspendOrCheckpointRequestFlags())));
+ __ Andi(tmp, tmp, dchecked_integral_cast<int32_t>(Thread::SuspendOrCheckpointRequestFlags()));
+ __ Bnez(tmp, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64());
+}
+
+void Riscv64JNIMacroAssembler::ExceptionPoll(JNIMacroLabel* label) {
+ ScratchRegisterScope srs(&asm_);
+ XRegister tmp = srs.AllocateXRegister();
+ __ Loadd(tmp, TR, Thread::ExceptionOffset<kRiscv64PointerSize>().Int32Value());
+ __ Bnez(tmp, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64());
+}
+
+void Riscv64JNIMacroAssembler::DeliverPendingException() {
+ // Pass exception object as argument.
+ // Don't care about preserving A0 as this won't return.
+ // Note: The scratch register from `ExceptionPoll()` may have been clobbered.
+ __ Loadd(A0, TR, Thread::ExceptionOffset<kRiscv64PointerSize>().Int32Value());
+ __ Loadd(RA, TR, QUICK_ENTRYPOINT_OFFSET(kRiscv64PointerSize, pDeliverException).Int32Value());
+ __ Jalr(RA);
+ // Call should never return.
+ __ Unimp();
+}
+
+std::unique_ptr<JNIMacroLabel> Riscv64JNIMacroAssembler::CreateLabel() {
+ return std::unique_ptr<JNIMacroLabel>(new Riscv64JNIMacroLabel());
+}
+
+void Riscv64JNIMacroAssembler::Jump(JNIMacroLabel* label) {
+ CHECK(label != nullptr);
+ __ J(down_cast<Riscv64Label*>(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()));
+}
+
+void Riscv64JNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) {
+ CHECK(label != nullptr);
+
+ DCHECK_EQ(Thread::IsGcMarkingSize(), 4u);
+
+ ScratchRegisterScope srs(&asm_);
+ XRegister test_reg = srs.AllocateXRegister();
+ int32_t is_gc_marking_offset = Thread::IsGcMarkingOffset<kRiscv64PointerSize>().Int32Value();
+ __ Loadw(test_reg, TR, is_gc_marking_offset);
+ switch (cond) {
+ case JNIMacroUnaryCondition::kZero:
+ __ Beqz(test_reg, down_cast<Riscv64Label*>(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()));
+ break;
+ case JNIMacroUnaryCondition::kNotZero:
+ __ Bnez(test_reg, down_cast<Riscv64Label*>(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()));
+ break;
+ default:
+ LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(cond);
+ UNREACHABLE();
+ }
+}
+
+void Riscv64JNIMacroAssembler::TestMarkBit(ManagedRegister m_ref,
+ JNIMacroLabel* label,
+ JNIMacroUnaryCondition cond) {
+ XRegister ref = m_ref.AsRiscv64().AsXRegister();
+ ScratchRegisterScope srs(&asm_);
+ XRegister tmp = srs.AllocateXRegister();
+ __ Loadw(tmp, ref, mirror::Object::MonitorOffset().Int32Value());
+ // Move the bit we want to check to the sign bit, so that we can use BGEZ/BLTZ
+ // to check it. Extracting the bit for BEQZ/BNEZ would require one more instruction.
+ static_assert(LockWord::kMarkBitStateSize == 1u);
+ __ Slliw(tmp, tmp, 31 - LockWord::kMarkBitStateShift);
+ switch (cond) {
+ case JNIMacroUnaryCondition::kZero:
+ __ Bgez(tmp, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64());
+ break;
+ case JNIMacroUnaryCondition::kNotZero:
+ __ Bltz(tmp, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64());
+ break;
+ default:
+ LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(cond);
+ UNREACHABLE();
+ }
+}
+
+void Riscv64JNIMacroAssembler::TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) {
+ int32_t small_offset = dchecked_integral_cast<int32_t>(address & 0xfff) -
+ dchecked_integral_cast<int32_t>((address & 0x800) << 1);
+ int64_t remainder = static_cast<int64_t>(address) - small_offset;
+ ScratchRegisterScope srs(&asm_);
+ XRegister tmp = srs.AllocateXRegister();
+ __ LoadConst64(tmp, remainder);
+ __ Lb(tmp, tmp, small_offset);
+ __ Bnez(tmp, down_cast<Riscv64Label*>(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()));
+}
+
+void Riscv64JNIMacroAssembler::Bind(JNIMacroLabel* label) {
+ CHECK(label != nullptr);
+ __ Bind(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64());
+}
+
+void Riscv64JNIMacroAssembler::CreateJObject(ManagedRegister m_dest,
+ FrameOffset spilled_reference_offset,
+ ManagedRegister m_ref,
+ bool null_allowed) {
+ Riscv64ManagedRegister dest = m_dest.AsRiscv64();
+ Riscv64ManagedRegister ref = m_ref.AsRiscv64();
+ DCHECK(dest.IsXRegister());
+ DCHECK(ref.IsXRegister());
+
+ Riscv64Label null_label;
+ if (null_allowed) {
+ if (!dest.Equals(ref)) {
+ __ Li(dest.AsXRegister(), 0);
+ }
+ __ Beqz(ref.AsXRegister(), &null_label);
+ }
+ __ AddConst64(dest.AsXRegister(), SP, spilled_reference_offset.Int32Value());
+ if (null_allowed) {
+ __ Bind(&null_label);
+ }
+}
+
+#undef __
+
+} // namespace riscv64
+} // namespace art
diff --git a/compiler/utils/riscv64/jni_macro_assembler_riscv64.h b/compiler/utils/riscv64/jni_macro_assembler_riscv64.h
new file mode 100644
index 0000000000..79618e2c8e
--- /dev/null
+++ b/compiler/utils/riscv64/jni_macro_assembler_riscv64.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_RISCV64_JNI_MACRO_ASSEMBLER_RISCV64_H_
+#define ART_COMPILER_UTILS_RISCV64_JNI_MACRO_ASSEMBLER_RISCV64_H_
+
+#include <stdint.h>
+#include <memory>
+#include <vector>
+
+#include <android-base/logging.h>
+
+#include "assembler_riscv64.h"
+#include "base/arena_containers.h"
+#include "base/enums.h"
+#include "base/macros.h"
+#include "offsets.h"
+#include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+class Riscv64JNIMacroAssembler : public JNIMacroAssemblerFwd<Riscv64Assembler, PointerSize::k64> {
+ public:
+ explicit Riscv64JNIMacroAssembler(ArenaAllocator* allocator)
+ : JNIMacroAssemblerFwd<Riscv64Assembler, PointerSize::k64>(allocator) {}
+ ~Riscv64JNIMacroAssembler();
+
+ // Finalize the code.
+ void FinalizeCode() override;
+
+ // Emit code that will create an activation on the stack.
+ void BuildFrame(size_t frame_size,
+ ManagedRegister method_reg,
+ ArrayRef<const ManagedRegister> callee_save_regs) override;
+
+ // Emit code that will remove an activation from the stack.
+ void RemoveFrame(size_t frame_size,
+ ArrayRef<const ManagedRegister> callee_save_regs,
+ bool may_suspend) override;
+
+ void IncreaseFrameSize(size_t adjust) override;
+ void DecreaseFrameSize(size_t adjust) override;
+
+ ManagedRegister CoreRegisterWithSize(ManagedRegister src, size_t size) override;
+
+ // Store routines.
+ void Store(FrameOffset offs, ManagedRegister src, size_t size) override;
+ void Store(ManagedRegister base, MemberOffset offs, ManagedRegister src, size_t size) override;
+ void StoreRawPtr(FrameOffset offs, ManagedRegister src) override;
+ void StoreStackPointerToThread(ThreadOffset64 offs, bool tag_sp) override;
+
+ // Load routines.
+ void Load(ManagedRegister dest, FrameOffset offs, size_t size) override;
+ void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) override;
+ void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) override;
+ void LoadGcRootWithoutReadBarrier(ManagedRegister dest,
+ ManagedRegister base,
+ MemberOffset offs) override;
+
+ // Copying routines.
+ void MoveArguments(ArrayRef<ArgumentLocation> dests,
+ ArrayRef<ArgumentLocation> srcs,
+ ArrayRef<FrameOffset> refs) override;
+ void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
+ void Move(ManagedRegister dest, size_t value) override;
+
+ // Sign extension.
+ void SignExtend(ManagedRegister mreg, size_t size) override;
+
+ // Zero extension.
+ void ZeroExtend(ManagedRegister mreg, size_t size) override;
+
+ // Exploit fast access in managed code to Thread::Current().
+ void GetCurrentThread(ManagedRegister dest) override;
+ void GetCurrentThread(FrameOffset offset) override;
+
+ // Decode JNI transition or local `jobject`. For (weak) global `jobject`, jump to slow path.
+ void DecodeJNITransitionOrLocalJObject(ManagedRegister reg,
+ JNIMacroLabel* slow_path,
+ JNIMacroLabel* resume) override;
+
+ // Heap::VerifyObject on src. In some cases (such as a reference to this) we
+ // know that src may not be null.
+ void VerifyObject(ManagedRegister src, bool could_be_null) override;
+ void VerifyObject(FrameOffset src, bool could_be_null) override;
+
+ // Jump to address held at [base+offset] (used for tail calls).
+ void Jump(ManagedRegister base, Offset offset) override;
+
+ // Call to address held at [base+offset].
+ void Call(ManagedRegister base, Offset offset) override;
+ void CallFromThread(ThreadOffset64 offset) override;
+
+ // Generate fast-path for transition to Native. Go to `label` if any thread flag is set.
+ // The implementation can use `scratch_regs` which should be callee save core registers
+ // (already saved before this call) and must preserve all argument registers.
+ void TryToTransitionFromRunnableToNative(JNIMacroLabel* label,
+ ArrayRef<const ManagedRegister> scratch_regs) override;
+
+ // Generate fast-path for transition to Runnable. Go to `label` if any thread flag is set.
+ // The implementation can use `scratch_regs` which should be core argument registers
+ // not used as return registers and it must preserve the `return_reg` if any.
+ void TryToTransitionFromNativeToRunnable(JNIMacroLabel* label,
+ ArrayRef<const ManagedRegister> scratch_regs,
+ ManagedRegister return_reg) override;
+
+ // Generate suspend check and branch to `label` if there is a pending suspend request.
+ void SuspendCheck(JNIMacroLabel* label) override;
+
+ // Generate code to check if Thread::Current()->exception_ is non-null
+ // and branch to the `label` if it is.
+ void ExceptionPoll(JNIMacroLabel* label) override;
+ // Deliver pending exception.
+ void DeliverPendingException() override;
+
+ // Create a new label that can be used with Jump/Bind calls.
+ std::unique_ptr<JNIMacroLabel> CreateLabel() override;
+ // Emit an unconditional jump to the label.
+ void Jump(JNIMacroLabel* label) override;
+ // Emit a conditional jump to the label by applying a unary condition test to the GC marking flag.
+ void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
+ // Emit a conditional jump to the label by applying a unary condition test to object's mark bit.
+ void TestMarkBit(ManagedRegister ref, JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
+ // Emit a conditional jump to label if the loaded value from specified locations is not zero.
+ void TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) override;
+ // Code at this offset will serve as the target for the Jump call.
+ void Bind(JNIMacroLabel* label) override;
+
+ private:
+ void CreateJObject(ManagedRegister m_dest,
+ FrameOffset spilled_reference_offset,
+ ManagedRegister m_ref,
+ bool null_allowed);
+
+ ART_FRIEND_TEST(JniMacroAssemblerRiscv64Test, CreateJObject);
+};
+
+class Riscv64JNIMacroLabel final
+ : public JNIMacroLabelCommon<Riscv64JNIMacroLabel,
+ Riscv64Label,
+ InstructionSet::kRiscv64> {
+ public:
+ Riscv64Label* AsRiscv64() {
+ return AsPlatformLabel();
+ }
+};
+
+} // namespace riscv64
+} // namespace art
+
+#endif // ART_COMPILER_UTILS_RISCV64_JNI_MACRO_ASSEMBLER_RISCV64_H_
diff --git a/compiler/utils/riscv64/jni_macro_assembler_riscv64_test.cc b/compiler/utils/riscv64/jni_macro_assembler_riscv64_test.cc
new file mode 100644
index 0000000000..004ba9bb7f
--- /dev/null
+++ b/compiler/utils/riscv64/jni_macro_assembler_riscv64_test.cc
@@ -0,0 +1,959 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include <fstream>
+#include <map>
+#include <regex>
+
+#include "gtest/gtest.h"
+
+#include "indirect_reference_table.h"
+#include "lock_word.h"
+#include "jni/quick/calling_convention.h"
+#include "utils/riscv64/jni_macro_assembler_riscv64.h"
+#include "utils/assembler_test_base.h"
+
+#include "base/macros.h"
+#include "base/malloc_arena_pool.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+#define __ assembler_.
+
+class JniMacroAssemblerRiscv64Test : public AssemblerTestBase {
+ public:
+ JniMacroAssemblerRiscv64Test() : pool_(), allocator_(&pool_), assembler_(&allocator_) { }
+
+ protected:
+ InstructionSet GetIsa() override { return InstructionSet::kRiscv64; }
+
+ void DriverStr(const std::string& assembly_text, const std::string& test_name) {
+ assembler_.FinalizeCode();
+ size_t cs = assembler_.CodeSize();
+ std::vector<uint8_t> data(cs);
+ MemoryRegion code(&data[0], data.size());
+ assembler_.CopyInstructions(code);
+ Driver(data, assembly_text, test_name);
+ }
+
+ static Riscv64ManagedRegister AsManaged(XRegister reg) {
+ return Riscv64ManagedRegister::FromXRegister(reg);
+ }
+
+ static Riscv64ManagedRegister AsManaged(FRegister reg) {
+ return Riscv64ManagedRegister::FromFRegister(reg);
+ }
+
+ std::string EmitRet() {
+ __ RemoveFrame(/*frame_size=*/ 0u,
+ /*callee_save_regs=*/ ArrayRef<const ManagedRegister>(),
+ /*may_suspend=*/ false);
+ return "ret\n";
+ }
+
+ static const size_t kWordSize = 4u;
+ static const size_t kDoubleWordSize = 8u;
+
+ MallocArenaPool pool_;
+ ArenaAllocator allocator_;
+ Riscv64JNIMacroAssembler assembler_;
+};
+
+TEST_F(JniMacroAssemblerRiscv64Test, StackFrame) {
+ std::string expected;
+
+ std::unique_ptr<JniCallingConvention> jni_conv = JniCallingConvention::Create(
+ &allocator_,
+ /*is_static=*/ false,
+ /*is_synchronized=*/ false,
+ /*is_fast_native=*/ false,
+ /*is_critical_native=*/ false,
+ /*shorty=*/ "V",
+ InstructionSet::kRiscv64);
+ size_t frame_size = jni_conv->FrameSize();
+ ManagedRegister method_reg = AsManaged(A0);
+ ArrayRef<const ManagedRegister> callee_save_regs = jni_conv->CalleeSaveRegisters();
+
+ __ BuildFrame(frame_size, method_reg, callee_save_regs);
+ expected += "addi sp, sp, -208\n"
+ "sd ra, 200(sp)\n"
+ "sd s11, 192(sp)\n"
+ "sd s10, 184(sp)\n"
+ "sd s9, 176(sp)\n"
+ "sd s8, 168(sp)\n"
+ "sd s7, 160(sp)\n"
+ "sd s6, 152(sp)\n"
+ "sd s5, 144(sp)\n"
+ "sd s4, 136(sp)\n"
+ "sd s3, 128(sp)\n"
+ "sd s2, 120(sp)\n"
+ "sd s0, 112(sp)\n"
+ "fsd fs11, 104(sp)\n"
+ "fsd fs10, 96(sp)\n"
+ "fsd fs9, 88(sp)\n"
+ "fsd fs8, 80(sp)\n"
+ "fsd fs7, 72(sp)\n"
+ "fsd fs6, 64(sp)\n"
+ "fsd fs5, 56(sp)\n"
+ "fsd fs4, 48(sp)\n"
+ "fsd fs3, 40(sp)\n"
+ "fsd fs2, 32(sp)\n"
+ "fsd fs1, 24(sp)\n"
+ "fsd fs0, 16(sp)\n"
+ "sd a0, 0(sp)\n";
+
+ __ RemoveFrame(frame_size, callee_save_regs, /*may_suspend=*/ false);
+ expected += "fld fs0, 16(sp)\n"
+ "fld fs1, 24(sp)\n"
+ "fld fs2, 32(sp)\n"
+ "fld fs3, 40(sp)\n"
+ "fld fs4, 48(sp)\n"
+ "fld fs5, 56(sp)\n"
+ "fld fs6, 64(sp)\n"
+ "fld fs7, 72(sp)\n"
+ "fld fs8, 80(sp)\n"
+ "fld fs9, 88(sp)\n"
+ "fld fs10, 96(sp)\n"
+ "fld fs11, 104(sp)\n"
+ "ld s0, 112(sp)\n"
+ "ld s2, 120(sp)\n"
+ "ld s3, 128(sp)\n"
+ "ld s4, 136(sp)\n"
+ "ld s5, 144(sp)\n"
+ "ld s6, 152(sp)\n"
+ "ld s7, 160(sp)\n"
+ "ld s8, 168(sp)\n"
+ "ld s9, 176(sp)\n"
+ "ld s10, 184(sp)\n"
+ "ld s11, 192(sp)\n"
+ "ld ra, 200(sp)\n"
+ "addi sp, sp, 208\n"
+ "ret\n";
+
+ DriverStr(expected, "StackFrame");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, ChangeFrameSize) {
+ std::string expected;
+
+ __ IncreaseFrameSize(128);
+ expected += "addi sp, sp, -128\n";
+ __ DecreaseFrameSize(128);
+ expected += "addi sp, sp, 128\n";
+
+ __ IncreaseFrameSize(0); // No-op
+ __ DecreaseFrameSize(0); // No-op
+
+ __ IncreaseFrameSize(2048);
+ expected += "addi sp, sp, -2048\n";
+ __ DecreaseFrameSize(2048);
+ expected += "addi t6, sp, 2047\n"
+ "addi sp, t6, 1\n";
+
+ __ IncreaseFrameSize(4096);
+ expected += "addi t6, sp, -2048\n"
+ "addi sp, t6, -2048\n";
+ __ DecreaseFrameSize(4096);
+ expected += "lui t6, 1\n"
+ "add sp, sp, t6\n";
+
+ __ IncreaseFrameSize(6 * KB);
+ expected += "addi t6, zero, -3\n"
+ "slli t6, t6, 11\n"
+ "add sp, sp, t6\n";
+ __ DecreaseFrameSize(6 * KB);
+ expected += "addi t6, zero, 3\n"
+ "slli t6, t6, 11\n"
+ "add sp, sp, t6\n";
+
+ __ IncreaseFrameSize(6 * KB + 16);
+ expected += "lui t6, 0xffffe\n"
+ "addiw t6, t6, 2048-16\n"
+ "add sp, sp, t6\n";
+ __ DecreaseFrameSize(6 * KB + 16);
+ expected += "lui t6, 2\n"
+ "addiw t6, t6, 16-2048\n"
+ "add sp, sp, t6\n";
+
+ DriverStr(expected, "ChangeFrameSize");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, Store) {
+ std::string expected;
+
+ __ Store(FrameOffset(0), AsManaged(A0), kWordSize);
+ expected += "sw a0, 0(sp)\n";
+ __ Store(FrameOffset(2048), AsManaged(S0), kDoubleWordSize);
+ expected += "addi t6, sp, 0x7f8\n"
+ "sd s0, 8(t6)\n";
+
+ __ Store(AsManaged(A1), MemberOffset(256), AsManaged(S2), kDoubleWordSize);
+ expected += "sd s2, 256(a1)\n";
+ __ Store(AsManaged(S3), MemberOffset(4 * KB), AsManaged(T1), kWordSize);
+ expected += "lui t6, 1\n"
+ "add t6, t6, s3\n"
+ "sw t1, 0(t6)\n";
+
+ __ Store(AsManaged(A3), MemberOffset(384), AsManaged(FA5), kDoubleWordSize);
+ expected += "fsd fa5, 384(a3)\n";
+ __ Store(AsManaged(S4), MemberOffset(4 * KB + 16), AsManaged(FT10), kWordSize);
+ expected += "lui t6, 1\n"
+ "add t6, t6, s4\n"
+ "fsw ft10, 16(t6)\n";
+
+ __ StoreRawPtr(FrameOffset(128), AsManaged(A7));
+ expected += "sd a7, 128(sp)\n";
+ __ StoreRawPtr(FrameOffset(6 * KB), AsManaged(S11));
+ expected += "lui t6, 2\n"
+ "add t6, t6, sp\n"
+ "sd s11, -2048(t6)\n";
+
+ __ StoreStackPointerToThread(ThreadOffset64(512), /*tag_sp=*/ false);
+ expected += "sd sp, 512(s1)\n";
+ __ StoreStackPointerToThread(ThreadOffset64(3 * KB), /*tag_sp=*/ true);
+ expected += "ori t6, sp, 0x2\n"
+ "addi t5, s1, 0x7f8\n"
+ "sd t6, 0x408(t5)\n";
+
+ DriverStr(expected, "Store");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, Load) {
+ std::string expected;
+
+ __ Load(AsManaged(A0), FrameOffset(0), kWordSize);
+ expected += "lw a0, 0(sp)\n";
+ __ Load(AsManaged(S0), FrameOffset(2048), kDoubleWordSize);
+ expected += "addi t6, sp, 0x7f8\n"
+ "ld s0, 8(t6)\n";
+
+ __ Load(AsManaged(S2), AsManaged(A1), MemberOffset(256), kDoubleWordSize);
+ expected += "ld s2, 256(a1)\n";
+ __ Load(AsManaged(T1), AsManaged(S3), MemberOffset(4 * KB), kWordSize);
+ expected += "lui t6, 1\n"
+ "add t6, t6, s3\n"
+ "lw t1, 0(t6)\n";
+
+ __ Load(AsManaged(FA5), AsManaged(A3), MemberOffset(384), kDoubleWordSize);
+ expected += "fld fa5, 384(a3)\n";
+ __ Load(AsManaged(FT10), AsManaged(S4), MemberOffset(4 * KB + 16), kWordSize);
+ expected += "lui t6, 1\n"
+ "add t6, t6, s4\n"
+ "flw ft10, 16(t6)\n";
+
+ __ LoadRawPtrFromThread(AsManaged(A7), ThreadOffset64(512));
+ expected += "ld a7, 512(s1)\n";
+ __ LoadRawPtrFromThread(AsManaged(S11), ThreadOffset64(3 * KB));
+ expected += "addi t6, s1, 0x7f8\n"
+ "ld s11, 0x408(t6)\n";
+
+ __ LoadGcRootWithoutReadBarrier(AsManaged(T0), AsManaged(A0), MemberOffset(0));
+ expected += "lwu t0, 0(a0)\n";
+ __ LoadGcRootWithoutReadBarrier(AsManaged(T1), AsManaged(S2), MemberOffset(0x800));
+ expected += "addi t6, s2, 0x7f8\n"
+ "lwu t1, 8(t6)\n";
+
+ DriverStr(expected, "Load");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, CreateJObject) {
+ std::string expected;
+
+ __ CreateJObject(AsManaged(A0), FrameOffset(8), AsManaged(A0), /*null_allowed=*/ true);
+ expected += "beqz a0, 1f\n"
+ "addi a0, sp, 8\n"
+ "1:\n";
+ __ CreateJObject(AsManaged(A1), FrameOffset(12), AsManaged(A1), /*null_allowed=*/ false);
+ expected += "addi a1, sp, 12\n";
+ __ CreateJObject(AsManaged(A2), FrameOffset(16), AsManaged(A3), /*null_allowed=*/ true);
+ expected += "li a2, 0\n"
+ "beqz a3, 2f\n"
+ "addi a2, sp, 16\n"
+ "2:\n";
+ __ CreateJObject(AsManaged(A4), FrameOffset(2048), AsManaged(A5), /*null_allowed=*/ false);
+ expected += "addi t6, sp, 2047\n"
+ "addi a4, t6, 1\n";
+
+ DriverStr(expected, "CreateJObject");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, MoveArguments) {
+ std::string expected;
+
+ static constexpr FrameOffset kInvalidReferenceOffset =
+ JNIMacroAssembler<kArmPointerSize>::kInvalidReferenceOffset;
+ static constexpr size_t kNativePointerSize = static_cast<size_t>(kRiscv64PointerSize);
+ static constexpr size_t kFloatSize = 4u;
+ static constexpr size_t kXlenInBytes = 8u; // Used for integral args and `double`.
+
+ // Normal or @FastNative static with parameters "LIJIJILJI".
+ // Note: This shall not spill references to the stack. The JNI compiler spills
+ // references in an separate initial pass before moving arguments and creating `jobject`s.
+ ArgumentLocation move_dests1[] = {
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kNativePointerSize), // `jclass`
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kNativePointerSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kXlenInBytes),
+ ArgumentLocation(FrameOffset(0), kNativePointerSize),
+ ArgumentLocation(FrameOffset(8), kXlenInBytes),
+ ArgumentLocation(FrameOffset(16), kXlenInBytes),
+ };
+ ArgumentLocation move_srcs1[] = {
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A0), kNativePointerSize), // `jclass`
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), 2 * kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), 2 * kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kVRegSize),
+ ArgumentLocation(FrameOffset(76), 2 * kVRegSize),
+ ArgumentLocation(FrameOffset(84), kVRegSize),
+ };
+ FrameOffset move_refs1[] {
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(40),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(72),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ };
+ __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests1),
+ ArrayRef<ArgumentLocation>(move_srcs1),
+ ArrayRef<FrameOffset>(move_refs1));
+ expected += "beqz a7, 1f\n"
+ "addi a7, sp, 72\n"
+ "1:\n"
+ "sd a7, 0(sp)\n"
+ "ld t6, 76(sp)\n"
+ "sd t6, 8(sp)\n"
+ "lw t6, 84(sp)\n"
+ "sd t6, 16(sp)\n"
+ "mv a7, a6\n"
+ "mv a6, a5\n"
+ "mv a5, a4\n"
+ "mv a4, a3\n"
+ "mv a3, a2\n"
+ "li a2, 0\n"
+ "beqz a1, 2f\n"
+ "add a2, sp, 40\n"
+ "2:\n"
+ "mv a1, a0\n";
+
+ // Normal or @FastNative static with parameters "LIJIJILJI" - spill references.
+ ArgumentLocation move_dests1_spill_refs[] = {
+ ArgumentLocation(FrameOffset(40), kVRegSize),
+ ArgumentLocation(FrameOffset(72), kVRegSize),
+ };
+ ArgumentLocation move_srcs1_spill_refs[] = {
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kVRegSize),
+ };
+ FrameOffset move_refs1_spill_refs[] {
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ };
+ __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests1_spill_refs),
+ ArrayRef<ArgumentLocation>(move_srcs1_spill_refs),
+ ArrayRef<FrameOffset>(move_refs1_spill_refs));
+ expected += "sw a1, 40(sp)\n"
+ "sw a7, 72(sp)\n";
+
+ // Normal or @FastNative with parameters "LLIJIJIJLI" (first is `this`).
+ // Note: This shall not spill references to the stack. The JNI compiler spills
+ // references in an separate initial pass before moving arguments and creating `jobject`s.
+ ArgumentLocation move_dests2[] = {
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kNativePointerSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kNativePointerSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kXlenInBytes),
+ ArgumentLocation(FrameOffset(0), kXlenInBytes),
+ ArgumentLocation(FrameOffset(8), kNativePointerSize),
+ ArgumentLocation(FrameOffset(16), kXlenInBytes),
+ };
+ ArgumentLocation move_srcs2[] = {
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), 2 * kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), 2 * kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kVRegSize),
+ ArgumentLocation(FrameOffset(76), 2 * kVRegSize),
+ ArgumentLocation(FrameOffset(84), kVRegSize),
+ ArgumentLocation(FrameOffset(88), kVRegSize),
+ };
+ FrameOffset move_refs2[] {
+ FrameOffset(40),
+ FrameOffset(44),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(84),
+ FrameOffset(kInvalidReferenceOffset),
+ };
+ __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests2),
+ ArrayRef<ArgumentLocation>(move_srcs2),
+ ArrayRef<FrameOffset>(move_refs2));
+ // Args in A1-A7 do not move but references are converted to `jobject`.
+ expected += "addi a1, sp, 40\n"
+ "beqz a2, 1f\n"
+ "addi a2, sp, 44\n"
+ "1:\n"
+ "ld t6, 76(sp)\n"
+ "sd t6, 0(sp)\n"
+ "lwu t6, 84(sp)\n"
+ "beqz t6, 2f\n"
+ "addi t6, sp, 84\n"
+ "2:\n"
+ "sd t6, 8(sp)\n"
+ "lw t6, 88(sp)\n"
+ "sd t6, 16(sp)\n";
+
+ // Normal or @FastNative static with parameters "FDFDFDFDFDIJIJIJL".
+ ArgumentLocation move_dests3[] = {
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kNativePointerSize), // `jclass`
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA0), kFloatSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA1), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA2), kFloatSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA3), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA4), kFloatSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA5), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA6), kFloatSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA7), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kFloatSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kXlenInBytes),
+ ArgumentLocation(FrameOffset(0), kXlenInBytes),
+ ArgumentLocation(FrameOffset(8), kXlenInBytes),
+ ArgumentLocation(FrameOffset(16), kNativePointerSize),
+ };
+ ArgumentLocation move_srcs3[] = {
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A0), kNativePointerSize), // `jclass`
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA0), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA1), 2 * kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA2), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA3), 2 * kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA4), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA5), 2 * kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA6), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA7), 2 * kVRegSize),
+ ArgumentLocation(FrameOffset(88), kVRegSize),
+ ArgumentLocation(FrameOffset(92), 2 * kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), 2 * kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), 2 * kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), 2 * kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kVRegSize),
+ };
+ FrameOffset move_refs3[] {
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(88),
+ };
+ __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests3),
+ ArrayRef<ArgumentLocation>(move_srcs3),
+ ArrayRef<FrameOffset>(move_refs3));
+ // FP args in FA0-FA7 do not move.
+ expected += "sd a5, 0(sp)\n"
+ "sd a6, 8(sp)\n"
+ "beqz a7, 1f\n"
+ "addi a7, sp, 88\n"
+ "1:\n"
+ "sd a7, 16(sp)\n"
+ "mv a5, a2\n"
+ "mv a6, a3\n"
+ "mv a7, a4\n"
+ "lw a2, 88(sp)\n"
+ "ld a3, 92(sp)\n"
+ "mv a4, a1\n"
+ "mv a1, a0\n";
+
+ // @CriticalNative with parameters "DFDFDFDFIDJIJFDIIJ".
+ ArgumentLocation move_dests4[] = {
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA0), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA1), kFloatSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA2), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA3), kFloatSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA4), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA5), kFloatSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA6), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA7), kFloatSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A0), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kFloatSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kXlenInBytes),
+ ArgumentLocation(FrameOffset(0), kXlenInBytes),
+ ArgumentLocation(FrameOffset(8), kXlenInBytes),
+ };
+ ArgumentLocation move_srcs4[] = {
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA0), 2 * kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA1), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA2), 2 * kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA3), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA4), 2 * kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA5), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA6), 2 * kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA7), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kVRegSize),
+ ArgumentLocation(FrameOffset(92), 2 * kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), 2 * kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), 2 * kVRegSize),
+ ArgumentLocation(FrameOffset(112), kVRegSize),
+ ArgumentLocation(FrameOffset(116), 2 * kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), 2 * kVRegSize),
+ };
+ FrameOffset move_refs4[] {
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ };
+ __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests4),
+ ArrayRef<ArgumentLocation>(move_srcs4),
+ ArrayRef<FrameOffset>(move_refs4));
+ // FP args in FA0-FA7 and integral args in A2-A4 do not move.
+ expected += "sd a6, 0(sp)\n"
+ "sd a7, 8(sp)\n"
+ "mv a0, a1\n"
+ "ld a1, 92(sp)\n"
+ "ld a6, 116(sp)\n"
+ "mv a7, a5\n"
+ "lw a5, 112(sp)\n";
+
+ // @CriticalNative with parameters "JIJIJIJIJI".
+ ArgumentLocation move_dests5[] = {
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A0), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kXlenInBytes),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kXlenInBytes),
+ ArgumentLocation(FrameOffset(0), kXlenInBytes),
+ ArgumentLocation(FrameOffset(8), kXlenInBytes),
+ };
+ ArgumentLocation move_srcs5[] = {
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), 2 * kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), 2 * kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), 2 * kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kVRegSize),
+ ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), 2 * kVRegSize),
+ ArgumentLocation(FrameOffset(84), kVRegSize),
+ ArgumentLocation(FrameOffset(88), 2 * kVRegSize),
+ ArgumentLocation(FrameOffset(96), kVRegSize),
+ };
+ FrameOffset move_refs5[] {
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ };
+ __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests5),
+ ArrayRef<ArgumentLocation>(move_srcs5),
+ ArrayRef<FrameOffset>(move_refs5));
+ expected += "ld t6, 88(sp)\n"
+ "sd t6, 0(sp)\n"
+ "lw t6, 96(sp)\n"
+ "sd t6, 8(sp)\n"
+ "mv a0, a1\n"
+ "mv a1, a2\n"
+ "mv a2, a3\n"
+ "mv a3, a4\n"
+ "mv a4, a5\n"
+ "mv a5, a6\n"
+ "mv a6, a7\n"
+ "lw a7, 84(sp)\n";
+
+ DriverStr(expected, "MoveArguments");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, Move) {
+ std::string expected;
+
+ __ Move(AsManaged(A0), AsManaged(A1), kWordSize);
+ expected += "mv a0, a1\n";
+ __ Move(AsManaged(A2), AsManaged(A3), kDoubleWordSize);
+ expected += "mv a2, a3\n";
+
+ __ Move(AsManaged(A4), AsManaged(A4), kWordSize); // No-op.
+ __ Move(AsManaged(A5), AsManaged(A5), kDoubleWordSize); // No-op.
+
+ DriverStr(expected, "Move");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, GetCurrentThread) {
+ std::string expected;
+
+ __ GetCurrentThread(AsManaged(A0));
+ expected += "mv a0, s1\n";
+
+ __ GetCurrentThread(FrameOffset(256));
+ expected += "sd s1, 256(sp)\n";
+ __ GetCurrentThread(FrameOffset(3 * KB));
+ expected += "addi t6, sp, 0x7f8\n"
+ "sd s1, 0x408(t6)\n";
+
+ DriverStr(expected, "GetCurrentThread");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, DecodeJNITransitionOrLocalJObject) {
+ std::string expected;
+
+ constexpr int64_t kGlobalOrWeakGlobalMask = IndirectReferenceTable::GetGlobalOrWeakGlobalMask();
+ constexpr int64_t kIndirectRefKindMask = IndirectReferenceTable::GetIndirectRefKindMask();
+
+ std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel();
+ std::unique_ptr<JNIMacroLabel> resume = __ CreateLabel();
+
+ __ DecodeJNITransitionOrLocalJObject(AsManaged(A0), slow_path.get(), resume.get());
+ expected += "beqz a0, 1f\n"
+ "andi t6, a0, " + std::to_string(kGlobalOrWeakGlobalMask) + "\n"
+ "bnez t6, 2f\n"
+ "andi a0, a0, ~" + std::to_string(kIndirectRefKindMask) + "\n"
+ "lw a0, (a0)\n";
+
+ __ Bind(resume.get());
+ expected += "1:\n";
+
+ expected += EmitRet();
+
+ __ Bind(slow_path.get());
+ expected += "2:\n";
+
+ __ Jump(resume.get());
+ expected += "j 1b\n";
+
+ DriverStr(expected, "DecodeJNITransitionOrLocalJObject");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, JumpCodePointer) {
+ std::string expected;
+
+ __ Jump(AsManaged(A0), Offset(24));
+ expected += "ld t6, 24(a0)\n"
+ "jr t6\n";
+
+ __ Jump(AsManaged(S2), Offset(2048));
+ expected += "addi t6, s2, 0x7f8\n"
+ "ld t6, 8(t6)\n"
+ "jr t6\n";
+
+ DriverStr(expected, "JumpCodePointer");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, Call) {
+ std::string expected;
+
+ __ Call(AsManaged(A0), Offset(32));
+ expected += "ld ra, 32(a0)\n"
+ "jalr ra\n";
+
+ __ Call(AsManaged(S2), Offset(2048));
+ expected += "addi t6, s2, 0x7f8\n"
+ "ld ra, 8(t6)\n"
+ "jalr ra\n";
+
+ __ CallFromThread(ThreadOffset64(256));
+ expected += "ld ra, 256(s1)\n"
+ "jalr ra\n";
+
+ __ CallFromThread(ThreadOffset64(3 * KB));
+ expected += "addi t6, s1, 0x7f8\n"
+ "ld ra, 0x408(t6)\n"
+ "jalr ra\n";
+
+ DriverStr(expected, "Call");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, Transitions) {
+ std::string expected;
+
+ constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
+ constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
+ static_assert(kRunnableStateValue == 0u);
+ constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kRiscv64PointerSize>();
+ static_assert(thread_flags_offset.SizeValue() == 0u);
+ constexpr size_t thread_held_mutex_mutator_lock_offset =
+ Thread::HeldMutexOffset<kRiscv64PointerSize>(kMutatorLock).SizeValue();
+ constexpr size_t thread_mutator_lock_offset =
+ Thread::MutatorLockOffset<kRiscv64PointerSize>().SizeValue();
+
+ std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel();
+ std::unique_ptr<JNIMacroLabel> resume = __ CreateLabel();
+
+ const ManagedRegister raw_scratch_regs[] = { AsManaged(T0), AsManaged(T1) };
+ const ArrayRef<const ManagedRegister> scratch_regs(raw_scratch_regs);
+
+ __ TryToTransitionFromRunnableToNative(slow_path.get(), scratch_regs);
+ expected += "1:\n"
+ "lr.w t0, (s1)\n"
+ "li t1, " + std::to_string(kNativeStateValue) + "\n"
+ "bnez t0, 4f\n"
+ "sc.w.rl t0, t1, (s1)\n"
+ "bnez t0, 1b\n"
+ "addi t6, s1, 0x7f8\n"
+ "sd x0, " + std::to_string(thread_held_mutex_mutator_lock_offset - 0x7f8u) + "(t6)\n";
+
+ __ TryToTransitionFromNativeToRunnable(slow_path.get(), scratch_regs, AsManaged(A0));
+ expected += "2:\n"
+ "lr.w.aq t0, (s1)\n"
+ "li t1, " + std::to_string(kNativeStateValue) + "\n"
+ "bne t0, t1, 4f\n"
+ "sc.w t0, x0, (s1)\n"
+ "bnez t0, 2b\n"
+ "ld t0, " + std::to_string(thread_mutator_lock_offset) + "(s1)\n"
+ "addi t6, s1, 0x7f8\n"
+ "sd t0, " + std::to_string(thread_held_mutex_mutator_lock_offset - 0x7f8u) + "(t6)\n";
+
+ __ Bind(resume.get());
+ expected += "3:\n";
+
+ expected += EmitRet();
+
+ __ Bind(slow_path.get());
+ expected += "4:\n";
+
+ __ Jump(resume.get());
+ expected += "j 3b";
+
+ DriverStr(expected, "SuspendCheck");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, SuspendCheck) {
+ std::string expected;
+
+ ThreadOffset64 thread_flags_offet = Thread::ThreadFlagsOffset<kRiscv64PointerSize>();
+
+ std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel();
+ std::unique_ptr<JNIMacroLabel> resume = __ CreateLabel();
+
+ __ SuspendCheck(slow_path.get());
+ expected += "lw t6, " + std::to_string(thread_flags_offet.Int32Value()) + "(s1)\n"
+ "andi t6, t6, " + std::to_string(Thread::SuspendOrCheckpointRequestFlags()) + "\n"
+ "bnez t6, 2f\n";
+
+ __ Bind(resume.get());
+ expected += "1:\n";
+
+ expected += EmitRet();
+
+ __ Bind(slow_path.get());
+ expected += "2:\n";
+
+ __ Jump(resume.get());
+ expected += "j 1b";
+
+ DriverStr(expected, "SuspendCheck");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, Exception) {
+ std::string expected;
+
+ ThreadOffset64 exception_offset = Thread::ExceptionOffset<kArm64PointerSize>();
+ ThreadOffset64 deliver_offset = QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pDeliverException);
+
+ std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel();
+
+ __ ExceptionPoll(slow_path.get());
+ expected += "ld t6, " + std::to_string(exception_offset.Int32Value()) + "(s1)\n"
+ "bnez t6, 1f\n";
+
+ expected += EmitRet();
+
+ __ Bind(slow_path.get());
+ expected += "1:\n";
+
+ __ DeliverPendingException();
+ expected += "ld a0, " + std::to_string(exception_offset.Int32Value()) + "(s1)\n"
+ "ld ra, " + std::to_string(deliver_offset.Int32Value()) + "(s1)\n"
+ "jalr ra\n"
+ "unimp\n";
+
+ DriverStr(expected, "Exception");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, JumpLabel) {
+ std::string expected;
+
+ std::unique_ptr<JNIMacroLabel> target = __ CreateLabel();
+ std::unique_ptr<JNIMacroLabel> back = __ CreateLabel();
+
+ __ Jump(target.get());
+ expected += "j 2f\n";
+
+ __ Bind(back.get());
+ expected += "1:\n";
+
+ __ Move(AsManaged(A0), AsManaged(A1), static_cast<size_t>(kRiscv64PointerSize));
+ expected += "mv a0, a1\n";
+
+ __ Bind(target.get());
+ expected += "2:\n";
+
+ __ Jump(back.get());
+ expected += "j 1b\n";
+
+ DriverStr(expected, "JumpLabel");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, ReadBarrier) {
+ std::string expected;
+
+ ThreadOffset64 is_gc_marking_offset = Thread::IsGcMarkingOffset<kRiscv64PointerSize>();
+ MemberOffset monitor_offset = mirror::Object::MonitorOffset();
+
+ std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel();
+ std::unique_ptr<JNIMacroLabel> resume = __ CreateLabel();
+
+ __ TestGcMarking(slow_path.get(), JNIMacroUnaryCondition::kNotZero);
+ expected += "lw t6, " + std::to_string(is_gc_marking_offset.Int32Value()) + "(s1)\n"
+ "bnez t6, 2f\n";
+
+ __ TestGcMarking(slow_path.get(), JNIMacroUnaryCondition::kZero);
+ expected += "lw t6, " + std::to_string(is_gc_marking_offset.Int32Value()) + "(s1)\n"
+ "beqz t6, 2f\n";
+
+ __ Bind(resume.get());
+ expected += "1:\n";
+
+ expected += EmitRet();
+
+ __ Bind(slow_path.get());
+ expected += "2:\n";
+
+ __ TestMarkBit(AsManaged(A0), resume.get(), JNIMacroUnaryCondition::kNotZero);
+ expected += "lw t6, " + std::to_string(monitor_offset.Int32Value()) + "(a0)\n"
+ "slliw t6, t6, " + std::to_string(31 - LockWord::kMarkBitStateShift) + "\n"
+ "bltz t6, 1b\n";
+
+ __ TestMarkBit(AsManaged(T0), resume.get(), JNIMacroUnaryCondition::kZero);
+ expected += "lw t6, " + std::to_string(monitor_offset.Int32Value()) + "(t0)\n"
+ "slliw t6, t6, " + std::to_string(31 - LockWord::kMarkBitStateShift) + "\n"
+ "bgez t6, 1b\n";
+
+ DriverStr(expected, "ReadBarrier");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, TestByteAndJumpIfNotZero) {
+ // Note: The `TestByteAndJumpIfNotZero()` takes the address as a `uintptr_t`.
+ // Use 32-bit addresses, so that we can include this test in 32-bit host tests.
+
+ std::string expected;
+
+ std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel();
+ std::unique_ptr<JNIMacroLabel> resume = __ CreateLabel();
+
+ __ TestByteAndJumpIfNotZero(0x12345678u, slow_path.get());
+ expected += "lui t6, 0x12345\n"
+ "lb t6, 0x678(t6)\n"
+ "bnez t6, 2f\n";
+
+ __ TestByteAndJumpIfNotZero(0x87654321u, slow_path.get());
+ expected += "lui t6, 0x87654/4\n"
+ "slli t6, t6, 2\n"
+ "lb t6, 0x321(t6)\n"
+ "bnez t6, 2f\n";
+
+ __ Bind(resume.get());
+ expected += "1:\n";
+
+ expected += EmitRet();
+
+ __ Bind(slow_path.get());
+ expected += "2:\n";
+
+ __ TestByteAndJumpIfNotZero(0x456789abu, resume.get());
+ expected += "lui t6, 0x45678+1\n"
+ "lb t6, 0x9ab-0x1000(t6)\n"
+ "bnez t6, 1b\n";
+
+ DriverStr(expected, "TestByteAndJumpIfNotZero");
+}
+
+#undef __
+
+} // namespace riscv64
+} // namespace art
diff --git a/compiler/utils/riscv64/managed_register_riscv64.cc b/compiler/utils/riscv64/managed_register_riscv64.cc
index 560019ae09..99bd4be784 100644
--- a/compiler/utils/riscv64/managed_register_riscv64.cc
+++ b/compiler/utils/riscv64/managed_register_riscv64.cc
@@ -18,7 +18,7 @@
#include "base/globals.h"
-namespace art {
+namespace art HIDDEN {
namespace riscv64 {
bool Riscv64ManagedRegister::Overlaps(const Riscv64ManagedRegister& other) const {
diff --git a/compiler/utils/riscv64/managed_register_riscv64.h b/compiler/utils/riscv64/managed_register_riscv64.h
index 8e02a9dcc8..622d766945 100644
--- a/compiler/utils/riscv64/managed_register_riscv64.h
+++ b/compiler/utils/riscv64/managed_register_riscv64.h
@@ -24,7 +24,7 @@
#include "base/macros.h"
#include "utils/managed_register.h"
-namespace art {
+namespace art HIDDEN {
namespace riscv64 {
const int kNumberOfXRegIds = kNumberOfXRegisters;
diff --git a/compiler/utils/riscv64/managed_register_riscv64_test.cc b/compiler/utils/riscv64/managed_register_riscv64_test.cc
index c6ad2dc38a..d7012a796a 100644
--- a/compiler/utils/riscv64/managed_register_riscv64_test.cc
+++ b/compiler/utils/riscv64/managed_register_riscv64_test.cc
@@ -19,7 +19,7 @@
#include "base/globals.h"
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
namespace riscv64 {
TEST(Riscv64ManagedRegister, NoRegister) {
diff --git a/compiler/utils/stack_checks.h b/compiler/utils/stack_checks.h
index d0fff73df3..1be4532f3e 100644
--- a/compiler/utils/stack_checks.h
+++ b/compiler/utils/stack_checks.h
@@ -35,7 +35,7 @@ static constexpr size_t kSmallFrameSize = 1 * KB;
// stack overflow check on method entry.
//
// A frame is considered large when it's above kLargeFrameSize.
-static inline bool FrameNeedsStackCheck(size_t size, InstructionSet isa ATTRIBUTE_UNUSED) {
+static inline bool FrameNeedsStackCheck(size_t size, [[maybe_unused]] InstructionSet isa) {
return size >= kLargeFrameSize;
}
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 0f7854dc5c..e6503045fa 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -955,6 +955,12 @@ class X86Assembler final : public Assembler {
lock()->xaddl(address, reg);
}
+ void rdtsc() {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x31);
+ }
+
//
// Misc. functionality
//
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 5da6f04402..432322aea7 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -89,19 +89,7 @@ class AssemblerX86Test : public AssemblerTest<x86::X86Assembler,
addresses_.push_back(x86::Address(x86::ESP, 987654321));
}
- if (registers_.size() == 0) {
- registers_.insert(end(registers_),
- {
- new x86::Register(x86::EAX),
- new x86::Register(x86::EBX),
- new x86::Register(x86::ECX),
- new x86::Register(x86::EDX),
- new x86::Register(x86::EBP),
- new x86::Register(x86::ESP),
- new x86::Register(x86::ESI),
- new x86::Register(x86::EDI)
- });
-
+ if (secondary_register_names_.empty()) {
secondary_register_names_.emplace(x86::Register(x86::EAX), "ax");
secondary_register_names_.emplace(x86::Register(x86::EBX), "bx");
secondary_register_names_.emplace(x86::Register(x86::ECX), "cx");
@@ -121,38 +109,28 @@ class AssemblerX86Test : public AssemblerTest<x86::X86Assembler,
tertiary_register_names_.emplace(x86::Register(x86::ESI), "dh");
tertiary_register_names_.emplace(x86::Register(x86::EDI), "bh");
}
-
- if (fp_registers_.size() == 0) {
- fp_registers_.insert(end(fp_registers_),
- {
- new x86::XmmRegister(x86::XMM0),
- new x86::XmmRegister(x86::XMM1),
- new x86::XmmRegister(x86::XMM2),
- new x86::XmmRegister(x86::XMM3),
- new x86::XmmRegister(x86::XMM4),
- new x86::XmmRegister(x86::XMM5),
- new x86::XmmRegister(x86::XMM6),
- new x86::XmmRegister(x86::XMM7)
- });
- }
}
void TearDown() override {
AssemblerTest::TearDown();
- STLDeleteElements(&registers_);
- STLDeleteElements(&fp_registers_);
}
std::vector<x86::Address> GetAddresses() override {
return addresses_;
}
- std::vector<x86::Register*> GetRegisters() override {
- return registers_;
+ ArrayRef<const x86::Register> GetRegisters() override {
+ static constexpr x86::Register kRegisters[] = {
+ x86::EAX, x86::EBX, x86::ECX, x86::EDX, x86::EBP, x86::ESP, x86::ESI, x86::EDI
+ };
+ return ArrayRef<const x86::Register>(kRegisters);
}
- std::vector<x86::XmmRegister*> GetFPRegisters() override {
- return fp_registers_;
+ ArrayRef<const x86::XmmRegister> GetFPRegisters() override {
+ static constexpr x86::XmmRegister kFPRegisters[] = {
+ x86::XMM0, x86::XMM1, x86::XMM2, x86::XMM3, x86::XMM4, x86::XMM5, x86::XMM6, x86::XMM7
+ };
+ return ArrayRef<const x86::XmmRegister>(kFPRegisters);
}
x86::Immediate CreateImmediate(int64_t imm_value) override {
@@ -173,10 +151,8 @@ class AssemblerX86Test : public AssemblerTest<x86::X86Assembler,
private:
std::vector<x86::Address> addresses_;
- std::vector<x86::Register*> registers_;
std::map<x86::Register, std::string, X86RegisterCompare> secondary_register_names_;
std::map<x86::Register, std::string, X86RegisterCompare> tertiary_register_names_;
- std::vector<x86::XmmRegister*> fp_registers_;
};
class AssemblerX86AVXTest : public AssemblerX86Test {
@@ -267,28 +243,28 @@ TEST_F(AssemblerX86Test, RepeatAF) {
TEST_F(AssemblerX86Test, PoplAllAddresses) {
// Make sure all addressing modes combinations are tested at least once.
std::vector<x86::Address> all_addresses;
- for (x86::Register* base : GetRegisters()) {
+ for (x86::Register base : GetRegisters()) {
// Base only.
- all_addresses.push_back(x86::Address(*base, -1));
- all_addresses.push_back(x86::Address(*base, 0));
- all_addresses.push_back(x86::Address(*base, 1));
- all_addresses.push_back(x86::Address(*base, 123456789));
- for (x86::Register* index : GetRegisters()) {
- if (*index == x86::ESP) {
+ all_addresses.push_back(x86::Address(base, -1));
+ all_addresses.push_back(x86::Address(base, 0));
+ all_addresses.push_back(x86::Address(base, 1));
+ all_addresses.push_back(x86::Address(base, 123456789));
+ for (x86::Register index : GetRegisters()) {
+ if (index == x86::ESP) {
// Index cannot be ESP.
continue;
- } else if (*base == *index) {
+ } else if (base == index) {
// Index only.
- all_addresses.push_back(x86::Address(*index, TIMES_1, -1));
- all_addresses.push_back(x86::Address(*index, TIMES_2, 0));
- all_addresses.push_back(x86::Address(*index, TIMES_4, 1));
- all_addresses.push_back(x86::Address(*index, TIMES_8, 123456789));
+ all_addresses.push_back(x86::Address(index, TIMES_1, -1));
+ all_addresses.push_back(x86::Address(index, TIMES_2, 0));
+ all_addresses.push_back(x86::Address(index, TIMES_4, 1));
+ all_addresses.push_back(x86::Address(index, TIMES_8, 123456789));
}
// Base and index.
- all_addresses.push_back(x86::Address(*base, *index, TIMES_1, -1));
- all_addresses.push_back(x86::Address(*base, *index, TIMES_2, 0));
- all_addresses.push_back(x86::Address(*base, *index, TIMES_4, 1));
- all_addresses.push_back(x86::Address(*base, *index, TIMES_8, 123456789));
+ all_addresses.push_back(x86::Address(base, index, TIMES_1, -1));
+ all_addresses.push_back(x86::Address(base, index, TIMES_2, 0));
+ all_addresses.push_back(x86::Address(base, index, TIMES_4, 1));
+ all_addresses.push_back(x86::Address(base, index, TIMES_8, 123456789));
}
}
DriverStr(RepeatA(&x86::X86Assembler::popl, all_addresses, "popl {mem}"), "popq");
@@ -510,11 +486,11 @@ TEST_F(AssemblerX86Test, PopcntlAddress) {
// Rorl only allows CL as the shift count.
std::string rorl_fn(AssemblerX86Test::Base* assembler_test, x86::X86Assembler* assembler) {
std::ostringstream str;
- std::vector<x86::Register*> registers = assembler_test->GetRegisters();
+ ArrayRef<const x86::Register> registers = assembler_test->GetRegisters();
x86::Register shifter(x86::ECX);
- for (auto reg : registers) {
- assembler->rorl(*reg, shifter);
- str << "rorl %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+ for (auto&& reg : registers) {
+ assembler->rorl(reg, shifter);
+ str << "rorl %cl, %" << assembler_test->GetRegisterName(reg) << "\n";
}
return str.str();
}
@@ -530,11 +506,11 @@ TEST_F(AssemblerX86Test, RorlImm) {
// Roll only allows CL as the shift count.
std::string roll_fn(AssemblerX86Test::Base* assembler_test, x86::X86Assembler* assembler) {
std::ostringstream str;
- std::vector<x86::Register*> registers = assembler_test->GetRegisters();
+ ArrayRef<const x86::Register> registers = assembler_test->GetRegisters();
x86::Register shifter(x86::ECX);
- for (auto reg : registers) {
- assembler->roll(*reg, shifter);
- str << "roll %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+ for (auto&& reg : registers) {
+ assembler->roll(reg, shifter);
+ str << "roll %cl, %" << assembler_test->GetRegisterName(reg) << "\n";
}
return str.str();
}
@@ -1379,27 +1355,27 @@ TEST_F(AssemblerX86Test, AddressDisplaceBy) {
for (int32_t disp0 : displacements) { // initial displacement
for (int32_t disp : displacements) { // extra displacement
- for (const x86::Register *reg : GetRegisters()) {
+ for (x86::Register reg : GetRegisters()) {
// Test non-SIB addressing.
- EXPECT_EQ(x86::Address::displace(x86::Address(*reg, disp0), disp),
- x86::Address(*reg, disp0 + disp));
+ EXPECT_EQ(x86::Address::displace(x86::Address(reg, disp0), disp),
+ x86::Address(reg, disp0 + disp));
// Test SIB addressing with EBP base.
- if (*reg != x86::ESP) {
+ if (reg != x86::ESP) {
for (ScaleFactor scale : scales) {
- EXPECT_EQ(x86::Address::displace(x86::Address(*reg, scale, disp0), disp),
- x86::Address(*reg, scale, disp0 + disp));
+ EXPECT_EQ(x86::Address::displace(x86::Address(reg, scale, disp0), disp),
+ x86::Address(reg, scale, disp0 + disp));
}
}
// Test SIB addressing with different base.
- for (const x86::Register *index : GetRegisters()) {
- if (*index == x86::ESP) {
+ for (x86::Register index : GetRegisters()) {
+ if (index == x86::ESP) {
continue; // Skip ESP as it cannot be used with this address constructor.
}
for (ScaleFactor scale : scales) {
- EXPECT_EQ(x86::Address::displace(x86::Address(*reg, *index, scale, disp0), disp),
- x86::Address(*reg, *index, scale, disp0 + disp));
+ EXPECT_EQ(x86::Address::displace(x86::Address(reg, index, scale, disp0), disp),
+ x86::Address(reg, index, scale, disp0 + disp));
}
}
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index 154e50b4e4..dfdbc183f1 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -83,7 +83,7 @@ void X86JNIMacroAssembler::BuildFrame(size_t frame_size,
void X86JNIMacroAssembler::RemoveFrame(size_t frame_size,
ArrayRef<const ManagedRegister> spill_regs,
- bool may_suspend ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] bool may_suspend) {
CHECK_ALIGNED(frame_size, kNativeStackAlignment);
cfi().RememberState();
// -kFramePointerSize for ArtMethod*.
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 3fdf05bed9..e2a32a7337 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -5244,6 +5244,12 @@ void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
EmitOperand(dst.LowBits(), src);
}
+void X86_64Assembler::rdtsc() {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x31);
+}
+
void X86_64Assembler::repne_scasb() {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 235ea03e2b..cb62500bc9 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -964,6 +964,8 @@ class X86_64Assembler final : public Assembler {
void popcntq(CpuRegister dst, CpuRegister src);
void popcntq(CpuRegister dst, const Address& src);
+ void rdtsc();
+
void rorl(CpuRegister reg, const Immediate& imm);
void rorl(CpuRegister operand, CpuRegister shifter);
void roll(CpuRegister reg, const Immediate& imm);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index a7c206afaa..e985441101 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -199,24 +199,7 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler,
addresses_.push_back(x86_64::Address(x86_64::CpuRegister(x86_64::R15), 123456789));
}
- if (registers_.size() == 0) {
- registers_.push_back(new x86_64::CpuRegister(x86_64::RAX));
- registers_.push_back(new x86_64::CpuRegister(x86_64::RBX));
- registers_.push_back(new x86_64::CpuRegister(x86_64::RCX));
- registers_.push_back(new x86_64::CpuRegister(x86_64::RDX));
- registers_.push_back(new x86_64::CpuRegister(x86_64::RBP));
- registers_.push_back(new x86_64::CpuRegister(x86_64::RSP));
- registers_.push_back(new x86_64::CpuRegister(x86_64::RSI));
- registers_.push_back(new x86_64::CpuRegister(x86_64::RDI));
- registers_.push_back(new x86_64::CpuRegister(x86_64::R8));
- registers_.push_back(new x86_64::CpuRegister(x86_64::R9));
- registers_.push_back(new x86_64::CpuRegister(x86_64::R10));
- registers_.push_back(new x86_64::CpuRegister(x86_64::R11));
- registers_.push_back(new x86_64::CpuRegister(x86_64::R12));
- registers_.push_back(new x86_64::CpuRegister(x86_64::R13));
- registers_.push_back(new x86_64::CpuRegister(x86_64::R14));
- registers_.push_back(new x86_64::CpuRegister(x86_64::R15));
-
+ if (secondary_register_names_.empty()) {
secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::RAX), "eax");
secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBX), "ebx");
secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::RCX), "ecx");
@@ -267,42 +250,59 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler,
quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R13), "r13b");
quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14b");
quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15b");
-
- fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM0));
- fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM1));
- fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM2));
- fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM3));
- fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM4));
- fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM5));
- fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM6));
- fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM7));
- fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM8));
- fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM9));
- fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM10));
- fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM11));
- fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM12));
- fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM13));
- fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM14));
- fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM15));
}
}
void TearDown() override {
AssemblerTest::TearDown();
- STLDeleteElements(&registers_);
- STLDeleteElements(&fp_registers_);
}
std::vector<x86_64::Address> GetAddresses() override {
return addresses_;
}
- std::vector<x86_64::CpuRegister*> GetRegisters() override {
- return registers_;
+ ArrayRef<const x86_64::CpuRegister> GetRegisters() override {
+ static constexpr x86_64::CpuRegister kRegisters[] = {
+ x86_64::CpuRegister(x86_64::RAX),
+ x86_64::CpuRegister(x86_64::RBX),
+ x86_64::CpuRegister(x86_64::RCX),
+ x86_64::CpuRegister(x86_64::RDX),
+ x86_64::CpuRegister(x86_64::RBP),
+ x86_64::CpuRegister(x86_64::RSP),
+ x86_64::CpuRegister(x86_64::RSI),
+ x86_64::CpuRegister(x86_64::RDI),
+ x86_64::CpuRegister(x86_64::R8),
+ x86_64::CpuRegister(x86_64::R9),
+ x86_64::CpuRegister(x86_64::R10),
+ x86_64::CpuRegister(x86_64::R11),
+ x86_64::CpuRegister(x86_64::R12),
+ x86_64::CpuRegister(x86_64::R13),
+ x86_64::CpuRegister(x86_64::R14),
+ x86_64::CpuRegister(x86_64::R15),
+ };
+ return ArrayRef<const x86_64::CpuRegister>(kRegisters);
}
- std::vector<x86_64::XmmRegister*> GetFPRegisters() override {
- return fp_registers_;
+ ArrayRef<const x86_64::XmmRegister> GetFPRegisters() override {
+ static constexpr x86_64::XmmRegister kFPRegisters[] = {
+ x86_64::XmmRegister(x86_64::XMM0),
+ x86_64::XmmRegister(x86_64::XMM1),
+ x86_64::XmmRegister(x86_64::XMM2),
+ x86_64::XmmRegister(x86_64::XMM3),
+ x86_64::XmmRegister(x86_64::XMM4),
+ x86_64::XmmRegister(x86_64::XMM5),
+ x86_64::XmmRegister(x86_64::XMM6),
+ x86_64::XmmRegister(x86_64::XMM7),
+ x86_64::XmmRegister(x86_64::XMM8),
+ x86_64::XmmRegister(x86_64::XMM9),
+ x86_64::XmmRegister(x86_64::XMM10),
+ x86_64::XmmRegister(x86_64::XMM11),
+ x86_64::XmmRegister(x86_64::XMM12),
+ x86_64::XmmRegister(x86_64::XMM13),
+ x86_64::XmmRegister(x86_64::XMM14),
+ x86_64::XmmRegister(x86_64::XMM15),
+ };
+ return ArrayRef<const x86_64::XmmRegister>(kFPRegisters);
}
x86_64::Immediate CreateImmediate(int64_t imm_value) override {
@@ -328,11 +328,9 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler,
private:
std::vector<x86_64::Address> addresses_;
- std::vector<x86_64::CpuRegister*> registers_;
std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> secondary_register_names_;
std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> tertiary_register_names_;
std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> quaternary_register_names_;
- std::vector<x86_64::XmmRegister*> fp_registers_;
};
class AssemblerX86_64AVXTest : public AssemblerX86_64Test {
@@ -515,28 +513,28 @@ TEST_F(AssemblerX86_64Test, Toolchain) {
TEST_F(AssemblerX86_64Test, PopqAllAddresses) {
// Make sure all addressing modes combinations are tested at least once.
std::vector<x86_64::Address> all_addresses;
- for (x86_64::CpuRegister* base : GetRegisters()) {
+ for (const x86_64::CpuRegister& base : GetRegisters()) {
// Base only.
- all_addresses.push_back(x86_64::Address(*base, -1));
- all_addresses.push_back(x86_64::Address(*base, 0));
- all_addresses.push_back(x86_64::Address(*base, 1));
- all_addresses.push_back(x86_64::Address(*base, 123456789));
- for (x86_64::CpuRegister* index : GetRegisters()) {
- if (index->AsRegister() == x86_64::RSP) {
+ all_addresses.push_back(x86_64::Address(base, -1));
+ all_addresses.push_back(x86_64::Address(base, 0));
+ all_addresses.push_back(x86_64::Address(base, 1));
+ all_addresses.push_back(x86_64::Address(base, 123456789));
+ for (const x86_64::CpuRegister& index : GetRegisters()) {
+ if (index.AsRegister() == x86_64::RSP) {
// Index cannot be RSP.
continue;
- } else if (base->AsRegister() == index->AsRegister()) {
+ } else if (base.AsRegister() == index.AsRegister()) {
// Index only.
- all_addresses.push_back(x86_64::Address(*index, TIMES_1, -1));
- all_addresses.push_back(x86_64::Address(*index, TIMES_2, 0));
- all_addresses.push_back(x86_64::Address(*index, TIMES_4, 1));
- all_addresses.push_back(x86_64::Address(*index, TIMES_8, 123456789));
+ all_addresses.push_back(x86_64::Address(index, TIMES_1, -1));
+ all_addresses.push_back(x86_64::Address(index, TIMES_2, 0));
+ all_addresses.push_back(x86_64::Address(index, TIMES_4, 1));
+ all_addresses.push_back(x86_64::Address(index, TIMES_8, 123456789));
}
// Base and index.
- all_addresses.push_back(x86_64::Address(*base, *index, TIMES_1, -1));
- all_addresses.push_back(x86_64::Address(*base, *index, TIMES_2, 0));
- all_addresses.push_back(x86_64::Address(*base, *index, TIMES_4, 1));
- all_addresses.push_back(x86_64::Address(*base, *index, TIMES_8, 123456789));
+ all_addresses.push_back(x86_64::Address(base, index, TIMES_1, -1));
+ all_addresses.push_back(x86_64::Address(base, index, TIMES_2, 0));
+ all_addresses.push_back(x86_64::Address(base, index, TIMES_4, 1));
+ all_addresses.push_back(x86_64::Address(base, index, TIMES_8, 123456789));
}
}
DriverStr(RepeatA(&x86_64::X86_64Assembler::popq, all_addresses, "popq {mem}"), "popq");
@@ -641,11 +639,11 @@ TEST_F(AssemblerX86_64Test, SublImm) {
// Shll only allows CL as the shift count.
std::string shll_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
- std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+ ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
x86_64::CpuRegister shifter(x86_64::RCX);
- for (auto reg : registers) {
- assembler->shll(*reg, shifter);
- str << "shll %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n";
+ for (auto&& reg : registers) {
+ assembler->shll(reg, shifter);
+ str << "shll %cl, %" << assembler_test->GetSecondaryRegisterName(reg) << "\n";
}
return str.str();
}
@@ -662,11 +660,11 @@ TEST_F(AssemblerX86_64Test, ShllImm) {
// Shlq only allows CL as the shift count.
std::string shlq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
- std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+ ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
x86_64::CpuRegister shifter(x86_64::RCX);
- for (auto reg : registers) {
- assembler->shlq(*reg, shifter);
- str << "shlq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+ for (auto&& reg : registers) {
+ assembler->shlq(reg, shifter);
+ str << "shlq %cl, %" << assembler_test->GetRegisterName(reg) << "\n";
}
return str.str();
}
@@ -683,11 +681,11 @@ TEST_F(AssemblerX86_64Test, ShlqImm) {
// Shrl only allows CL as the shift count.
std::string shrl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
- std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+ ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
x86_64::CpuRegister shifter(x86_64::RCX);
- for (auto reg : registers) {
- assembler->shrl(*reg, shifter);
- str << "shrl %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n";
+ for (auto&& reg : registers) {
+ assembler->shrl(reg, shifter);
+ str << "shrl %cl, %" << assembler_test->GetSecondaryRegisterName(reg) << "\n";
}
return str.str();
}
@@ -703,11 +701,11 @@ TEST_F(AssemblerX86_64Test, ShrlImm) {
// Shrq only allows CL as the shift count.
std::string shrq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
- std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+ ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
x86_64::CpuRegister shifter(x86_64::RCX);
- for (auto reg : registers) {
- assembler->shrq(*reg, shifter);
- str << "shrq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+ for (auto&& reg : registers) {
+ assembler->shrq(reg, shifter);
+ str << "shrq %cl, %" << assembler_test->GetRegisterName(reg) << "\n";
}
return str.str();
}
@@ -723,11 +721,11 @@ TEST_F(AssemblerX86_64Test, ShrqImm) {
// Sarl only allows CL as the shift count.
std::string sarl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
- std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+ ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
x86_64::CpuRegister shifter(x86_64::RCX);
- for (auto reg : registers) {
- assembler->sarl(*reg, shifter);
- str << "sarl %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n";
+ for (auto&& reg : registers) {
+ assembler->sarl(reg, shifter);
+ str << "sarl %cl, %" << assembler_test->GetSecondaryRegisterName(reg) << "\n";
}
return str.str();
}
@@ -743,11 +741,11 @@ TEST_F(AssemblerX86_64Test, SarlImm) {
// Sarq only allows CL as the shift count.
std::string sarq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
- std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+ ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
x86_64::CpuRegister shifter(x86_64::RCX);
- for (auto reg : registers) {
- assembler->sarq(*reg, shifter);
- str << "sarq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+ for (auto&& reg : registers) {
+ assembler->sarq(reg, shifter);
+ str << "sarq %cl, %" << assembler_test->GetRegisterName(reg) << "\n";
}
return str.str();
}
@@ -763,11 +761,11 @@ TEST_F(AssemblerX86_64Test, SarqImm) {
// Rorl only allows CL as the shift count.
std::string rorl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
- std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+ ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
x86_64::CpuRegister shifter(x86_64::RCX);
- for (auto reg : registers) {
- assembler->rorl(*reg, shifter);
- str << "rorl %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n";
+ for (auto&& reg : registers) {
+ assembler->rorl(reg, shifter);
+ str << "rorl %cl, %" << assembler_test->GetSecondaryRegisterName(reg) << "\n";
}
return str.str();
}
@@ -783,11 +781,11 @@ TEST_F(AssemblerX86_64Test, RorlImm) {
// Roll only allows CL as the shift count.
std::string roll_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
- std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+ ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
x86_64::CpuRegister shifter(x86_64::RCX);
- for (auto reg : registers) {
- assembler->roll(*reg, shifter);
- str << "roll %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n";
+ for (auto&& reg : registers) {
+ assembler->roll(reg, shifter);
+ str << "roll %cl, %" << assembler_test->GetSecondaryRegisterName(reg) << "\n";
}
return str.str();
}
@@ -803,11 +801,11 @@ TEST_F(AssemblerX86_64Test, RollImm) {
// Rorq only allows CL as the shift count.
std::string rorq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
- std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+ ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
x86_64::CpuRegister shifter(x86_64::RCX);
- for (auto reg : registers) {
- assembler->rorq(*reg, shifter);
- str << "rorq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+ for (auto&& reg : registers) {
+ assembler->rorq(reg, shifter);
+ str << "rorq %cl, %" << assembler_test->GetRegisterName(reg) << "\n";
}
return str.str();
}
@@ -823,11 +821,11 @@ TEST_F(AssemblerX86_64Test, RorqImm) {
// Rolq only allows CL as the shift count.
std::string rolq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
- std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+ ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
x86_64::CpuRegister shifter(x86_64::RCX);
- for (auto reg : registers) {
- assembler->rolq(*reg, shifter);
- str << "rolq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+ for (auto&& reg : registers) {
+ assembler->rolq(reg, shifter);
+ str << "rolq %cl, %" << assembler_test->GetRegisterName(reg) << "\n";
}
return str.str();
}
@@ -2135,7 +2133,7 @@ TEST_F(AssemblerX86_64Test, Psrldq) {
"psrldq $2, %xmm15\n", "psrldqi");
}
-std::string x87_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+std::string x87_fn([[maybe_unused]] AssemblerX86_64Test::Base* assembler_test,
x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
@@ -2202,7 +2200,7 @@ TEST_F(AssemblerX86_64Test, RetImm) {
"ret ${imm}", /*non-negative*/ true), "ret");
}
-std::string ret_and_leave_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+std::string ret_and_leave_fn([[maybe_unused]] AssemblerX86_64Test::Base* assembler_test,
x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
@@ -2375,13 +2373,13 @@ std::string setcc_test_fn(AssemblerX86_64Test::Base* assembler_test,
std::string suffixes[15] = { "o", "no", "b", "ae", "e", "ne", "be", "a", "s", "ns", "pe", "po",
"l", "ge", "le" };
- std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+ ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
std::ostringstream str;
- for (auto reg : registers) {
+ for (auto&& reg : registers) {
for (size_t i = 0; i < 15; ++i) {
- assembler->setcc(static_cast<x86_64::Condition>(i), *reg);
- str << "set" << suffixes[i] << " %" << assembler_test->GetQuaternaryRegisterName(*reg) << "\n";
+ assembler->setcc(static_cast<x86_64::Condition>(i), reg);
+ str << "set" << suffixes[i] << " %" << assembler_test->GetQuaternaryRegisterName(reg) << "\n";
}
}
@@ -2459,27 +2457,27 @@ TEST_F(AssemblerX86_64Test, AddressDisplaceBy) {
for (int32_t disp0 : displacements) { // initial displacement
for (int32_t disp : displacements) { // extra displacement
- for (const x86_64::CpuRegister* reg : GetRegisters()) {
+ for (const x86_64::CpuRegister reg : GetRegisters()) {
// Test non-SIB addressing.
- EXPECT_EQ(x86_64::Address::displace(x86_64::Address(*reg, disp0), disp),
- x86_64::Address(*reg, disp0 + disp));
+ EXPECT_EQ(x86_64::Address::displace(x86_64::Address(reg, disp0), disp),
+ x86_64::Address(reg, disp0 + disp));
// Test SIB addressing with RBP base.
- if (reg->AsRegister() != x86_64::RSP) {
+ if (reg.AsRegister() != x86_64::RSP) {
for (ScaleFactor scale : scales) {
- EXPECT_EQ(x86_64::Address::displace(x86_64::Address(*reg, scale, disp0), disp),
- x86_64::Address(*reg, scale, disp0 + disp));
+ EXPECT_EQ(x86_64::Address::displace(x86_64::Address(reg, scale, disp0), disp),
+ x86_64::Address(reg, scale, disp0 + disp));
}
}
// Test SIB addressing with different base.
- for (const x86_64::CpuRegister* index : GetRegisters()) {
- if (index->AsRegister() == x86_64::RSP) {
+ for (const x86_64::CpuRegister& index : GetRegisters()) {
+ if (index.AsRegister() == x86_64::RSP) {
continue; // Skip RSP as it cannot be used with this address constructor.
}
for (ScaleFactor scale : scales) {
- EXPECT_EQ(x86_64::Address::displace(x86_64::Address(*reg, *index, scale, disp0), disp),
- x86_64::Address(*reg, *index, scale, disp0 + disp));
+ EXPECT_EQ(x86_64::Address::displace(x86_64::Address(reg, index, scale, disp0), disp),
+ x86_64::Address(reg, index, scale, disp0 + disp));
}
}
@@ -2513,7 +2511,7 @@ static x86_64::X86_64ManagedRegister ManagedFromFpu(x86_64::FloatRegister r) {
return x86_64::X86_64ManagedRegister::FromXmmRegister(r);
}
-std::string buildframe_test_fn(JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+std::string buildframe_test_fn([[maybe_unused]] JNIMacroAssemblerX86_64Test::Base* assembler_test,
x86_64::X86_64JNIMacroAssembler* assembler) {
// TODO: more interesting spill registers / entry spills.
@@ -2556,7 +2554,7 @@ TEST_F(JNIMacroAssemblerX86_64Test, BuildFrame) {
DriverFn(&buildframe_test_fn, "BuildFrame");
}
-std::string removeframe_test_fn(JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+std::string removeframe_test_fn([[maybe_unused]] JNIMacroAssemblerX86_64Test::Base* assembler_test,
x86_64::X86_64JNIMacroAssembler* assembler) {
// TODO: more interesting spill registers / entry spills.
@@ -2588,7 +2586,7 @@ TEST_F(JNIMacroAssemblerX86_64Test, RemoveFrame) {
}
std::string increaseframe_test_fn(
- JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+ [[maybe_unused]] JNIMacroAssemblerX86_64Test::Base* assembler_test,
x86_64::X86_64JNIMacroAssembler* assembler) {
assembler->IncreaseFrameSize(0U);
assembler->IncreaseFrameSize(kStackAlignment);
@@ -2608,7 +2606,7 @@ TEST_F(JNIMacroAssemblerX86_64Test, IncreaseFrame) {
}
std::string decreaseframe_test_fn(
- JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+ [[maybe_unused]] JNIMacroAssemblerX86_64Test::Base* assembler_test,
x86_64::X86_64JNIMacroAssembler* assembler) {
assembler->DecreaseFrameSize(0U);
assembler->DecreaseFrameSize(kStackAlignment);
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
index 388845730e..e9e6dbdae7 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -95,7 +95,7 @@ void X86_64JNIMacroAssembler::BuildFrame(size_t frame_size,
void X86_64JNIMacroAssembler::RemoveFrame(size_t frame_size,
ArrayRef<const ManagedRegister> spill_regs,
- bool may_suspend ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] bool may_suspend) {
CHECK_ALIGNED(frame_size, kNativeStackAlignment);
cfi().RememberState();
int gpr_count = 0;
@@ -515,7 +515,7 @@ void X86_64JNIMacroAssembler::GetCurrentThread(FrameOffset offset) {
}
void X86_64JNIMacroAssembler::TryToTransitionFromRunnableToNative(
- JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs ATTRIBUTE_UNUSED) {
+ JNIMacroLabel* label, [[maybe_unused]] ArrayRef<const ManagedRegister> scratch_regs) {
constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kX86_64PointerSize>();