summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/Android.bp41
-rw-r--r--compiler/art_standalone_compiler_tests.xml3
-rw-r--r--compiler/cfi_test.h3
-rw-r--r--compiler/common_compiler_test.cc157
-rw-r--r--compiler/common_compiler_test.h22
-rw-r--r--compiler/compiled_method-inl.h55
-rw-r--r--compiler/compiled_method.cc141
-rw-r--r--compiler/compiled_method.h167
-rw-r--r--compiler/compiler.cc4
-rw-r--r--compiler/compiler.h17
-rw-r--r--compiler/compiler_reflection_test.cc59
-rw-r--r--compiler/debug/debug_info.h3
-rw-r--r--compiler/debug/dwarf/dwarf_test.cc2
-rw-r--r--compiler/debug/dwarf/dwarf_test.h3
-rw-r--r--compiler/debug/elf_compilation_unit.h3
-rw-r--r--compiler/debug/elf_debug_frame_writer.h7
-rw-r--r--compiler/debug/elf_debug_info_writer.h9
-rw-r--r--compiler/debug/elf_debug_line_writer.h4
-rw-r--r--compiler/debug/elf_debug_loc_writer.h8
-rw-r--r--compiler/debug/elf_debug_writer.cc5
-rw-r--r--compiler/debug/elf_debug_writer.h6
-rw-r--r--compiler/debug/elf_symtab_writer.h5
-rw-r--r--compiler/debug/method_debug_info.h3
-rw-r--r--compiler/debug/src_map_elem.h4
-rw-r--r--compiler/debug/src_map_elem_test.cc2
-rw-r--r--compiler/dex/inline_method_analyser.cc2
-rw-r--r--compiler/dex/inline_method_analyser.h2
-rw-r--r--compiler/dex/verification_results.cc64
-rw-r--r--compiler/dex/verification_results.h60
-rw-r--r--compiler/driver/compiled_code_storage.h73
-rw-r--r--compiler/driver/compiled_method_storage.cc288
-rw-r--r--compiler/driver/compiled_method_storage.h135
-rw-r--r--compiler/driver/compiled_method_storage_test.cc101
-rw-r--r--compiler/driver/compiler_options.cc33
-rw-r--r--compiler/driver/compiler_options.h37
-rw-r--r--compiler/driver/compiler_options_map-inl.h4
-rw-r--r--compiler/driver/compiler_options_map.h3
-rw-r--r--compiler/driver/dex_compilation_unit.cc2
-rw-r--r--compiler/driver/dex_compilation_unit.h3
-rw-r--r--compiler/driver/simple_compiler_options_map.h3
-rw-r--r--compiler/exception_test.cc31
-rw-r--r--compiler/jit/jit_compiler.cc12
-rw-r--r--compiler/jit/jit_compiler.h5
-rw-r--r--compiler/jit/jit_logger.cc2
-rw-r--r--compiler/jit/jit_logger.h4
-rw-r--r--compiler/jni/jni_cfi_test.cc24
-rw-r--r--compiler/jni/jni_compiler_test.cc35
-rw-r--r--compiler/jni/quick/arm/calling_convention_arm.cc6
-rw-r--r--compiler/jni/quick/arm/calling_convention_arm.h4
-rw-r--r--compiler/jni/quick/arm64/calling_convention_arm64.cc6
-rw-r--r--compiler/jni/quick/arm64/calling_convention_arm64.h4
-rw-r--r--compiler/jni/quick/calling_convention.cc12
-rw-r--r--compiler/jni/quick/calling_convention.h8
-rw-r--r--compiler/jni/quick/jni_compiler.cc190
-rw-r--r--compiler/jni/quick/jni_compiler.h3
-rw-r--r--compiler/jni/quick/x86/calling_convention_x86.cc6
-rw-r--r--compiler/jni/quick/x86/calling_convention_x86.h4
-rw-r--r--compiler/jni/quick/x86_64/calling_convention_x86_64.cc6
-rw-r--r--compiler/jni/quick/x86_64/calling_convention_x86_64.h4
-rw-r--r--compiler/libart-compiler.map34
-rw-r--r--compiler/linker/linker_patch.h5
-rw-r--r--compiler/linker/linker_patch_test.cc3
-rw-r--r--compiler/linker/output_stream_test.cc6
-rw-r--r--compiler/optimizing/block_builder.cc2
-rw-r--r--compiler/optimizing/block_builder.h3
-rw-r--r--compiler/optimizing/block_namer.cc2
-rw-r--r--compiler/optimizing/block_namer.h4
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc20
-rw-r--r--compiler/optimizing/bounds_check_elimination.h3
-rw-r--r--compiler/optimizing/bounds_check_elimination_test.cc3
-rw-r--r--compiler/optimizing/builder.cc4
-rw-r--r--compiler/optimizing/builder.h3
-rw-r--r--compiler/optimizing/cha_guard_optimization.cc3
-rw-r--r--compiler/optimizing/cha_guard_optimization.h3
-rw-r--r--compiler/optimizing/code_generator.cc175
-rw-r--r--compiler/optimizing/code_generator.h44
-rw-r--r--compiler/optimizing/code_generator_arm64.cc260
-rw-r--r--compiler/optimizing/code_generator_arm64.h52
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc289
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h74
-rw-r--r--compiler/optimizing/code_generator_riscv64.h23
-rw-r--r--compiler/optimizing/code_generator_utils.cc2
-rw-r--r--compiler/optimizing/code_generator_utils.h4
-rw-r--r--compiler/optimizing/code_generator_vector_arm64_neon.cc12
-rw-r--r--compiler/optimizing/code_generator_vector_arm64_sve.cc15
-rw-r--r--compiler/optimizing/code_generator_vector_arm_vixl.cc6
-rw-r--r--compiler/optimizing/code_generator_vector_x86.cc12
-rw-r--r--compiler/optimizing/code_generator_vector_x86_64.cc12
-rw-r--r--compiler/optimizing/code_generator_x86.cc252
-rw-r--r--compiler/optimizing/code_generator_x86.h75
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc226
-rw-r--r--compiler/optimizing/code_generator_x86_64.h58
-rw-r--r--compiler/optimizing/code_sinking.cc282
-rw-r--r--compiler/optimizing/code_sinking.h11
-rw-r--r--compiler/optimizing/codegen_test.cc4
-rw-r--r--compiler/optimizing/codegen_test_utils.h23
-rw-r--r--compiler/optimizing/common_arm.h3
-rw-r--r--compiler/optimizing/common_arm64.h11
-rw-r--r--compiler/optimizing/common_dominator.h3
-rw-r--r--compiler/optimizing/constant_folding.cc318
-rw-r--r--compiler/optimizing/constant_folding.h13
-rw-r--r--compiler/optimizing/constant_folding_test.cc10
-rw-r--r--compiler/optimizing/constructor_fence_redundancy_elimination.cc14
-rw-r--r--compiler/optimizing/constructor_fence_redundancy_elimination.h3
-rw-r--r--compiler/optimizing/critical_native_abi_fixup_arm.cc11
-rw-r--r--compiler/optimizing/critical_native_abi_fixup_arm.h3
-rw-r--r--compiler/optimizing/data_type-inl.h2
-rw-r--r--compiler/optimizing/data_type.cc2
-rw-r--r--compiler/optimizing/data_type.h3
-rw-r--r--compiler/optimizing/data_type_test.cc2
-rw-r--r--compiler/optimizing/dead_code_elimination.cc527
-rw-r--r--compiler/optimizing/dead_code_elimination.h81
-rw-r--r--compiler/optimizing/dead_code_elimination_test.cc5
-rw-r--r--compiler/optimizing/dominator_test.cc5
-rw-r--r--compiler/optimizing/escape.cc2
-rw-r--r--compiler/optimizing/escape.h4
-rw-r--r--compiler/optimizing/execution_subgraph.cc2
-rw-r--r--compiler/optimizing/execution_subgraph.h3
-rw-r--r--compiler/optimizing/execution_subgraph_test.cc2
-rw-r--r--compiler/optimizing/execution_subgraph_test.h4
-rw-r--r--compiler/optimizing/find_loops_test.cc5
-rw-r--r--compiler/optimizing/graph_checker.cc269
-rw-r--r--compiler/optimizing/graph_checker.h36
-rw-r--r--compiler/optimizing/graph_checker_test.cc5
-rw-r--r--compiler/optimizing/graph_test.cc3
-rw-r--r--compiler/optimizing/graph_visualizer.cc30
-rw-r--r--compiler/optimizing/graph_visualizer.h3
-rw-r--r--compiler/optimizing/gvn.cc2
-rw-r--r--compiler/optimizing/gvn.h3
-rw-r--r--compiler/optimizing/gvn_test.cc3
-rw-r--r--compiler/optimizing/induction_var_analysis.cc98
-rw-r--r--compiler/optimizing/induction_var_analysis.h16
-rw-r--r--compiler/optimizing/induction_var_analysis_test.cc3
-rw-r--r--compiler/optimizing/induction_var_range.cc56
-rw-r--r--compiler/optimizing/induction_var_range.h12
-rw-r--r--compiler/optimizing/induction_var_range_test.cc7
-rw-r--r--compiler/optimizing/inliner.cc239
-rw-r--r--compiler/optimizing/inliner.h41
-rw-r--r--compiler/optimizing/instruction_builder.cc54
-rw-r--r--compiler/optimizing/instruction_builder.h3
-rw-r--r--compiler/optimizing/instruction_simplifier.cc121
-rw-r--r--compiler/optimizing/instruction_simplifier.h3
-rw-r--r--compiler/optimizing/instruction_simplifier_arm.cc4
-rw-r--r--compiler/optimizing/instruction_simplifier_arm.h3
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.cc4
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.h3
-rw-r--r--compiler/optimizing/instruction_simplifier_shared.cc4
-rw-r--r--compiler/optimizing/instruction_simplifier_shared.h3
-rw-r--r--compiler/optimizing/instruction_simplifier_test.cc63
-rw-r--r--compiler/optimizing/instruction_simplifier_x86.cc4
-rw-r--r--compiler/optimizing/instruction_simplifier_x86.h3
-rw-r--r--compiler/optimizing/instruction_simplifier_x86_64.cc4
-rw-r--r--compiler/optimizing/instruction_simplifier_x86_64.h3
-rw-r--r--compiler/optimizing/instruction_simplifier_x86_shared.cc3
-rw-r--r--compiler/optimizing/instruction_simplifier_x86_shared.h5
-rw-r--r--compiler/optimizing/intrinsic_objects.cc2
-rw-r--r--compiler/optimizing/intrinsic_objects.h11
-rw-r--r--compiler/optimizing/intrinsics.cc17
-rw-r--r--compiler/optimizing/intrinsics.h3
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc252
-rw-r--r--compiler/optimizing/intrinsics_arm64.h3
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc142
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.h3
-rw-r--r--compiler/optimizing/intrinsics_utils.h2
-rw-r--r--compiler/optimizing/intrinsics_x86.cc152
-rw-r--r--compiler/optimizing/intrinsics_x86.h3
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc141
-rw-r--r--compiler/optimizing/intrinsics_x86_64.h3
-rw-r--r--compiler/optimizing/licm.cc2
-rw-r--r--compiler/optimizing/licm.h3
-rw-r--r--compiler/optimizing/licm_test.cc3
-rw-r--r--compiler/optimizing/linear_order.cc2
-rw-r--r--compiler/optimizing/linear_order.h3
-rw-r--r--compiler/optimizing/linearize_test.cc5
-rw-r--r--compiler/optimizing/live_interval_test.cc3
-rw-r--r--compiler/optimizing/live_ranges_test.cc5
-rw-r--r--compiler/optimizing/liveness_test.cc5
-rw-r--r--compiler/optimizing/load_store_analysis.cc10
-rw-r--r--compiler/optimizing/load_store_analysis.h74
-rw-r--r--compiler/optimizing/load_store_analysis_test.cc89
-rw-r--r--compiler/optimizing/load_store_elimination.cc165
-rw-r--r--compiler/optimizing/load_store_elimination.h3
-rw-r--r--compiler/optimizing/load_store_elimination_test.cc385
-rw-r--r--compiler/optimizing/locations.cc15
-rw-r--r--compiler/optimizing/locations.h11
-rw-r--r--compiler/optimizing/loop_analysis.cc2
-rw-r--r--compiler/optimizing/loop_analysis.h3
-rw-r--r--compiler/optimizing/loop_optimization.cc69
-rw-r--r--compiler/optimizing/loop_optimization.h23
-rw-r--r--compiler/optimizing/loop_optimization_test.cc8
-rw-r--r--compiler/optimizing/nodes.cc306
-rw-r--r--compiler/optimizing/nodes.h262
-rw-r--r--compiler/optimizing/nodes_shared.cc2
-rw-r--r--compiler/optimizing/nodes_shared.h2
-rw-r--r--compiler/optimizing/nodes_test.cc3
-rw-r--r--compiler/optimizing/nodes_vector.h2
-rw-r--r--compiler/optimizing/nodes_vector_test.cc3
-rw-r--r--compiler/optimizing/nodes_x86.h2
-rw-r--r--compiler/optimizing/optimization.cc19
-rw-r--r--compiler/optimizing/optimization.h7
-rw-r--r--compiler/optimizing/optimizing_cfi_test.cc20
-rw-r--r--compiler/optimizing/optimizing_compiler.cc112
-rw-r--r--compiler/optimizing/optimizing_compiler.h7
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h17
-rw-r--r--compiler/optimizing/optimizing_unit_test.h52
-rw-r--r--compiler/optimizing/parallel_move_resolver.cc2
-rw-r--r--compiler/optimizing/parallel_move_resolver.h3
-rw-r--r--compiler/optimizing/parallel_move_test.cc3
-rw-r--r--compiler/optimizing/pc_relative_fixups_x86.cc4
-rw-r--r--compiler/optimizing/pc_relative_fixups_x86.h3
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc21
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.h3
-rw-r--r--compiler/optimizing/pretty_printer.h3
-rw-r--r--compiler/optimizing/pretty_printer_test.cc5
-rw-r--r--compiler/optimizing/reference_type_propagation.cc94
-rw-r--r--compiler/optimizing/reference_type_propagation.h8
-rw-r--r--compiler/optimizing/reference_type_propagation_test.cc10
-rw-r--r--compiler/optimizing/register_allocation_resolver.cc2
-rw-r--r--compiler/optimizing/register_allocation_resolver.h3
-rw-r--r--compiler/optimizing/register_allocator.cc2
-rw-r--r--compiler/optimizing/register_allocator.h2
-rw-r--r--compiler/optimizing/register_allocator_graph_color.cc2
-rw-r--r--compiler/optimizing/register_allocator_graph_color.h2
-rw-r--r--compiler/optimizing/register_allocator_linear_scan.cc2
-rw-r--r--compiler/optimizing/register_allocator_linear_scan.h2
-rw-r--r--compiler/optimizing/register_allocator_test.cc7
-rw-r--r--compiler/optimizing/scheduler.cc5
-rw-r--r--compiler/optimizing/scheduler.h3
-rw-r--r--compiler/optimizing/scheduler_arm.cc6
-rw-r--r--compiler/optimizing/scheduler_arm.h12
-rw-r--r--compiler/optimizing/scheduler_arm64.cc2
-rw-r--r--compiler/optimizing/scheduler_arm64.h5
-rw-r--r--compiler/optimizing/scheduler_test.cc5
-rw-r--r--compiler/optimizing/select_generator.cc377
-rw-r--r--compiler/optimizing/select_generator.h42
-rw-r--r--compiler/optimizing/select_generator_test.cc3
-rw-r--r--compiler/optimizing/sharpening.cc6
-rw-r--r--compiler/optimizing/sharpening.h3
-rw-r--r--compiler/optimizing/side_effects_analysis.cc2
-rw-r--r--compiler/optimizing/side_effects_analysis.h3
-rw-r--r--compiler/optimizing/side_effects_test.cc3
-rw-r--r--compiler/optimizing/ssa_builder.cc3
-rw-r--r--compiler/optimizing/ssa_builder.h3
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc2
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.h3
-rw-r--r--compiler/optimizing/ssa_liveness_analysis_test.cc3
-rw-r--r--compiler/optimizing/ssa_phi_elimination.cc2
-rw-r--r--compiler/optimizing/ssa_phi_elimination.h3
-rw-r--r--compiler/optimizing/ssa_test.cc5
-rw-r--r--compiler/optimizing/stack_map_stream.cc28
-rw-r--r--compiler/optimizing/stack_map_stream.h22
-rw-r--r--compiler/optimizing/stack_map_test.cc59
-rw-r--r--compiler/optimizing/superblock_cloner.cc4
-rw-r--r--compiler/optimizing/superblock_cloner.h3
-rw-r--r--compiler/optimizing/superblock_cloner_test.cc3
-rw-r--r--compiler/optimizing/suspend_check_test.cc5
-rw-r--r--compiler/optimizing/write_barrier_elimination.cc161
-rw-r--r--compiler/optimizing/write_barrier_elimination.h56
-rw-r--r--compiler/optimizing/x86_memory_gen.cc4
-rw-r--r--compiler/optimizing/x86_memory_gen.h3
-rw-r--r--compiler/trampolines/trampoline_compiler.cc4
-rw-r--r--compiler/trampolines/trampoline_compiler.h13
-rw-r--r--compiler/utils/arm/assembler_arm_shared.h46
-rw-r--r--compiler/utils/arm/assembler_arm_vixl.cc6
-rw-r--r--compiler/utils/arm/assembler_arm_vixl.h25
-rw-r--r--compiler/utils/arm/constants_arm.cc2
-rw-r--r--compiler/utils/arm/constants_arm.h3
-rw-r--r--compiler/utils/arm/jni_macro_assembler_arm_vixl.cc221
-rw-r--r--compiler/utils/arm/jni_macro_assembler_arm_vixl.h110
-rw-r--r--compiler/utils/arm/managed_register_arm.cc2
-rw-r--r--compiler/utils/arm/managed_register_arm.h3
-rw-r--r--compiler/utils/arm/managed_register_arm_test.cc3
-rw-r--r--compiler/utils/arm64/assembler_arm64.cc7
-rw-r--r--compiler/utils/arm64/assembler_arm64.h3
-rw-r--r--compiler/utils/arm64/jni_macro_assembler_arm64.cc238
-rw-r--r--compiler/utils/arm64/jni_macro_assembler_arm64.h91
-rw-r--r--compiler/utils/arm64/managed_register_arm64.cc2
-rw-r--r--compiler/utils/arm64/managed_register_arm64.h3
-rw-r--r--compiler/utils/arm64/managed_register_arm64_test.cc3
-rw-r--r--compiler/utils/assembler.cc2
-rw-r--r--compiler/utils/assembler.h2
-rw-r--r--compiler/utils/assembler_test.h7
-rw-r--r--compiler/utils/assembler_test_base.h16
-rw-r--r--compiler/utils/assembler_thumb_test.cc69
-rw-r--r--compiler/utils/assembler_thumb_test_expected.cc.inc501
-rw-r--r--compiler/utils/atomic_dex_ref_map-inl.h3
-rw-r--r--compiler/utils/atomic_dex_ref_map.h3
-rw-r--r--compiler/utils/atomic_dex_ref_map_test.cc3
-rw-r--r--compiler/utils/dedupe_set-inl.h3
-rw-r--r--compiler/utils/dedupe_set.h2
-rw-r--r--compiler/utils/dedupe_set_test.cc3
-rw-r--r--compiler/utils/jni_macro_assembler.cc3
-rw-r--r--compiler/utils/jni_macro_assembler.h99
-rw-r--r--compiler/utils/jni_macro_assembler_test.h5
-rw-r--r--compiler/utils/label.h4
-rw-r--r--compiler/utils/managed_register.h8
-rw-r--r--compiler/utils/riscv64/managed_register_riscv64.cc52
-rw-r--r--compiler/utils/riscv64/managed_register_riscv64.h133
-rw-r--r--compiler/utils/riscv64/managed_register_riscv64_test.cc204
-rw-r--r--compiler/utils/stack_checks.h3
-rw-r--r--compiler/utils/swap_space.cc223
-rw-r--r--compiler/utils/swap_space.h242
-rw-r--r--compiler/utils/swap_space_test.cc83
-rw-r--r--compiler/utils/x86/assembler_x86.cc2
-rw-r--r--compiler/utils/x86/assembler_x86.h2
-rw-r--r--compiler/utils/x86/assembler_x86_test.cc3
-rw-r--r--compiler/utils/x86/constants_x86.h2
-rw-r--r--compiler/utils/x86/jni_macro_assembler_x86.cc209
-rw-r--r--compiler/utils/x86/jni_macro_assembler_x86.h87
-rw-r--r--compiler/utils/x86/managed_register_x86.cc2
-rw-r--r--compiler/utils/x86/managed_register_x86.h3
-rw-r--r--compiler/utils/x86/managed_register_x86_test.cc3
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc2
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h3
-rw-r--r--compiler/utils/x86_64/assembler_x86_64_test.cc3
-rw-r--r--compiler/utils/x86_64/constants_x86_64.h2
-rw-r--r--compiler/utils/x86_64/jni_macro_assembler_x86_64.cc212
-rw-r--r--compiler/utils/x86_64/jni_macro_assembler_x86_64.h106
-rw-r--r--compiler/utils/x86_64/managed_register_x86_64.cc2
-rw-r--r--compiler/utils/x86_64/managed_register_x86_64.h3
-rw-r--r--compiler/utils/x86_64/managed_register_x86_64_test.cc3
321 files changed, 6918 insertions, 5728 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp
index de98fdb993..a879bd8f06 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -33,11 +33,8 @@ art_cc_defaults {
defaults: ["art_defaults"],
host_supported: true,
srcs: [
- "compiled_method.cc",
"debug/elf_debug_writer.cc",
"dex/inline_method_analyser.cc",
- "dex/verification_results.cc",
- "driver/compiled_method_storage.cc",
"driver/compiler_options.cc",
"driver/dex_compilation_unit.cc",
"jit/jit_compiler.cc",
@@ -94,10 +91,10 @@ art_cc_defaults {
"optimizing/ssa_phi_elimination.cc",
"optimizing/stack_map_stream.cc",
"optimizing/superblock_cloner.cc",
+ "optimizing/write_barrier_elimination.cc",
"trampolines/trampoline_compiler.cc",
"utils/assembler.cc",
"utils/jni_macro_assembler.cc",
- "utils/swap_space.cc",
"compiler.cc",
],
@@ -133,6 +130,11 @@ art_cc_defaults {
"utils/arm64/managed_register_arm64.cc",
],
},
+ riscv64: {
+ srcs: [
+ "utils/riscv64/managed_register_riscv64.cc",
+ ],
+ },
x86: {
srcs: [
"jni/quick/x86/calling_convention_x86.cc",
@@ -176,6 +178,8 @@ art_cc_defaults {
],
export_include_dirs: ["."],
+ // Not using .map.txt because this is an internal API
+ version_script: "libart-compiler.map",
}
cc_defaults {
@@ -228,7 +232,7 @@ art_cc_library {
"libprofile",
"libdexfile",
],
- whole_static_libs: ["libelffile"],
+ static_libs: ["libelffile"],
runtime_libs: [
// `art::HGraphVisualizerDisassembler::HGraphVisualizerDisassembler` may dynamically load
// `libart-disassembler.so`.
@@ -245,6 +249,7 @@ art_cc_library {
apex_available: [
"com.android.art",
"com.android.art.debug",
+ "test_broken_com.android.art",
],
}
@@ -296,7 +301,7 @@ art_cc_library {
"libprofiled",
"libdexfiled",
],
- whole_static_libs: ["libelffiled"],
+ static_libs: ["libelffiled"],
runtime_libs: [
// `art::HGraphVisualizerDisassembler::HGraphVisualizerDisassembler` may dynamically load
// `libartd-disassembler.so`.
@@ -369,6 +374,7 @@ art_cc_defaults {
data: [
":art-gtest-jars-ExceptionHandle",
":art-gtest-jars-Interfaces",
+ ":art-gtest-jars-Main",
":art-gtest-jars-MyClassNatives",
],
tidy_timeout_srcs: [
@@ -381,9 +387,9 @@ art_cc_defaults {
"optimizing/ssa_test.cc",
],
srcs: [
+ "compiler_reflection_test.cc",
"debug/dwarf/dwarf_test.cc",
"debug/src_map_elem_test.cc",
- "driver/compiled_method_storage_test.cc",
"exception_test.cc",
"jni/jni_compiler_test.cc",
"linker/linker_patch_test.cc",
@@ -419,7 +425,6 @@ art_cc_defaults {
"optimizing/suspend_check_test.cc",
"utils/atomic_dex_ref_map_test.cc",
"utils/dedupe_set_test.cc",
- "utils/swap_space_test.cc",
"jni/jni_cfi_test.cc",
"optimizing/codegen_test.cc",
@@ -442,6 +447,11 @@ art_cc_defaults {
"utils/arm64/managed_register_arm64_test.cc",
],
},
+ riscv64: {
+ srcs: [
+ "utils/riscv64/managed_register_riscv64_test.cc",
+ ],
+ },
x86: {
srcs: [
"utils/x86/managed_register_x86_test.cc",
@@ -465,8 +475,8 @@ art_cc_defaults {
],
shared_libs: [
- "libbacktrace",
"libnativeloader",
+ "libunwindstack",
],
target: {
@@ -488,10 +498,12 @@ art_cc_test {
],
shared_libs: [
"libprofiled",
- "libartd-compiler",
"libartd-simulator-container",
+ "liblzma",
],
static_libs: [
+ "libartd-compiler",
+ "libelffiled",
"libvixld",
],
}
@@ -506,7 +518,8 @@ art_cc_test {
data: [":generate-boot-image"],
shared_libs: [
"libprofile",
- "libart-compiler",
+ "liblzma",
+ "libartpalette",
],
static_libs: [
// For now, link `libart-simulator-container` statically for simplicity,
@@ -515,6 +528,8 @@ art_cc_test {
// TODO(b/192070541): Consider linking `libart-simulator-container`
// dynamically.
"libart-simulator-container",
+ "libart-compiler",
+ "libelffile",
"libvixl",
],
test_config: "art_standalone_compiler_tests.xml",
@@ -548,9 +563,11 @@ art_cc_test {
},
},
shared_libs: [
- "libartd-compiler",
+ "liblzma",
],
static_libs: [
+ "libartd-compiler",
+ "libelffiled",
"libvixld",
],
}
diff --git a/compiler/art_standalone_compiler_tests.xml b/compiler/art_standalone_compiler_tests.xml
index f723971928..394ac8d4fb 100644
--- a/compiler/art_standalone_compiler_tests.xml
+++ b/compiler/art_standalone_compiler_tests.xml
@@ -14,6 +14,8 @@
limitations under the License.
-->
<configuration description="Runs art_standalone_compiler_tests.">
+ <option name="config-descriptor:metadata" key="mainline-param" value="com.google.android.art.apex" />
+
<target_preparer class="com.android.compatibility.common.tradefed.targetprep.FilePusher">
<option name="cleanup" value="true" />
<option name="push" value="art_standalone_compiler_tests->/data/local/tmp/art_standalone_compiler_tests/art_standalone_compiler_tests" />
@@ -24,6 +26,7 @@
<option name="cleanup" value="true" />
<option name="push" value="art-gtest-jars-ExceptionHandle.jar->/data/local/tmp/art_standalone_compiler_tests/art-gtest-jars-ExceptionHandle.jar" />
<option name="push" value="art-gtest-jars-Interfaces.jar->/data/local/tmp/art_standalone_compiler_tests/art-gtest-jars-Interfaces.jar" />
+ <option name="push" value="art-gtest-jars-Main.jar->/data/local/tmp/art_standalone_compiler_tests/art-gtest-jars-Main.jar" />
<option name="push" value="art-gtest-jars-MyClassNatives.jar->/data/local/tmp/art_standalone_compiler_tests/art-gtest-jars-MyClassNatives.jar" />
</target_preparer>
diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h
index 9755ef12d0..e65bee8e2e 100644
--- a/compiler/cfi_test.h
+++ b/compiler/cfi_test.h
@@ -23,6 +23,7 @@
#include "arch/instruction_set.h"
#include "base/enums.h"
+#include "base/macros.h"
#include "debug/dwarf/dwarf_test.h"
#include "disassembler.h"
#include "dwarf/dwarf_constants.h"
@@ -30,7 +31,7 @@
#include "gtest/gtest.h"
#include "thread.h"
-namespace art {
+namespace art HIDDEN {
class CFITest : public dwarf::DwarfTest {
public:
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index bbb2016566..442b96e5fa 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -28,10 +28,8 @@
#include "base/memfd.h"
#include "base/utils.h"
#include "class_linker.h"
-#include "compiled_method-inl.h"
#include "dex/descriptors_names.h"
-#include "dex/verification_results.h"
-#include "driver/compiled_method_storage.h"
+#include "driver/compiled_code_storage.h"
#include "driver/compiler_options.h"
#include "jni/java_vm_ext.h"
#include "interpreter/interpreter.h"
@@ -44,7 +42,7 @@
#include "thread-current-inl.h"
#include "utils/atomic_dex_ref_map-inl.h"
-namespace art {
+namespace art HIDDEN {
class CommonCompilerTestImpl::CodeAndMetadata {
public:
@@ -58,10 +56,10 @@ class CommonCompilerTestImpl::CodeAndMetadata {
const uint32_t vmap_table_offset = vmap_table.empty() ? 0u
: sizeof(OatQuickMethodHeader) + vmap_table.size();
OatQuickMethodHeader method_header(vmap_table_offset);
- const size_t code_alignment = GetInstructionSetAlignment(instruction_set);
+ const size_t code_alignment = GetInstructionSetCodeAlignment(instruction_set);
DCHECK_ALIGNED_PARAM(kPageSize, code_alignment);
- code_offset_ = RoundUp(vmap_table.size() + sizeof(method_header), code_alignment);
- const uint32_t capacity = RoundUp(code_offset_ + code_size, kPageSize);
+ const uint32_t code_offset = RoundUp(vmap_table.size() + sizeof(method_header), code_alignment);
+ const uint32_t capacity = RoundUp(code_offset + code_size, kPageSize);
// Create a memfd handle with sufficient capacity.
android::base::unique_fd mem_fd(art::memfd_create_compat("test code", /*flags=*/ 0));
@@ -82,12 +80,12 @@ class CommonCompilerTestImpl::CodeAndMetadata {
CHECK(rw_map_.IsValid()) << error_msg;
// Store data.
- uint8_t* code_addr = rw_map_.Begin() + code_offset_;
+ uint8_t* code_addr = rw_map_.Begin() + code_offset;
CHECK_ALIGNED_PARAM(code_addr, code_alignment);
- CHECK_LE(vmap_table_offset, code_offset_);
+ CHECK_LE(vmap_table_offset, code_offset);
memcpy(code_addr - vmap_table_offset, vmap_table.data(), vmap_table.size());
static_assert(std::is_trivially_copyable<OatQuickMethodHeader>::value, "Cannot use memcpy");
- CHECK_LE(sizeof(method_header), code_offset_);
+ CHECK_LE(sizeof(method_header), code_offset);
memcpy(code_addr - sizeof(method_header), &method_header, sizeof(method_header));
CHECK_LE(code_size, static_cast<size_t>(rw_map_.End() - code_addr));
memcpy(code_addr, code.data(), code_size);
@@ -108,22 +106,84 @@ class CommonCompilerTestImpl::CodeAndMetadata {
/*filename=*/ "test code",
&error_msg);
CHECK(rx_map_.IsValid()) << error_msg;
+
+ DCHECK_LT(code_offset, rx_map_.Size());
+ size_t adjustment = GetInstructionSetEntryPointAdjustment(instruction_set);
+ entry_point_ = rx_map_.Begin() + code_offset + adjustment;
}
- const void* GetCodePointer() const {
+ const void* GetEntryPoint() const {
DCHECK(rx_map_.IsValid());
- DCHECK_LE(code_offset_, rx_map_.Size());
- return rx_map_.Begin() + code_offset_;
+ return entry_point_;
}
private:
MemMap rw_map_;
MemMap rx_map_;
- uint32_t code_offset_;
+ const void* entry_point_;
DISALLOW_COPY_AND_ASSIGN(CodeAndMetadata);
};
+class CommonCompilerTestImpl::OneCompiledMethodStorage final : public CompiledCodeStorage {
+ public:
+ OneCompiledMethodStorage() {}
+ ~OneCompiledMethodStorage() {}
+
+ CompiledMethod* CreateCompiledMethod(InstructionSet instruction_set,
+ ArrayRef<const uint8_t> code,
+ ArrayRef<const uint8_t> stack_map,
+ ArrayRef<const uint8_t> cfi ATTRIBUTE_UNUSED,
+ ArrayRef<const linker::LinkerPatch> patches,
+ bool is_intrinsic ATTRIBUTE_UNUSED) override {
+ // Supports only one method at a time.
+ CHECK_EQ(instruction_set_, InstructionSet::kNone);
+ CHECK_NE(instruction_set, InstructionSet::kNone);
+ instruction_set_ = instruction_set;
+ CHECK(code_.empty());
+ CHECK(!code.empty());
+ code_.assign(code.begin(), code.end());
+ CHECK(stack_map_.empty());
+ CHECK(!stack_map.empty());
+ stack_map_.assign(stack_map.begin(), stack_map.end());
+ CHECK(patches.empty()) << "Linker patches are unsupported for compiler gtests.";
+ return reinterpret_cast<CompiledMethod*>(this);
+ }
+
+ ArrayRef<const uint8_t> GetThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED,
+ /*out*/ std::string* debug_name ATTRIBUTE_UNUSED) override {
+ LOG(FATAL) << "Unsupported.";
+ UNREACHABLE();
+ }
+
+ void SetThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED,
+ ArrayRef<const uint8_t> code ATTRIBUTE_UNUSED,
+ const std::string& debug_name ATTRIBUTE_UNUSED) override {
+ LOG(FATAL) << "Unsupported.";
+ UNREACHABLE();
+ }
+
+ InstructionSet GetInstructionSet() const {
+ CHECK_NE(instruction_set_, InstructionSet::kNone);
+ return instruction_set_;
+ }
+
+ ArrayRef<const uint8_t> GetCode() const {
+ CHECK(!code_.empty());
+ return ArrayRef<const uint8_t>(code_);
+ }
+
+ ArrayRef<const uint8_t> GetStackMap() const {
+ CHECK(!stack_map_.empty());
+ return ArrayRef<const uint8_t>(stack_map_);
+ }
+
+ private:
+ InstructionSet instruction_set_ = InstructionSet::kNone;
+ std::vector<uint8_t> code_;
+ std::vector<uint8_t> stack_map_;
+};
+
std::unique_ptr<CompilerOptions> CommonCompilerTestImpl::CreateCompilerOptions(
InstructionSet instruction_set, const std::string& variant) {
std::unique_ptr<CompilerOptions> compiler_options = std::make_unique<CompilerOptions>();
@@ -143,24 +203,7 @@ const void* CommonCompilerTestImpl::MakeExecutable(ArrayRef<const uint8_t> code,
InstructionSet instruction_set) {
CHECK_NE(code.size(), 0u);
code_and_metadata_.emplace_back(code, vmap_table, instruction_set);
- return code_and_metadata_.back().GetCodePointer();
-}
-
-void CommonCompilerTestImpl::MakeExecutable(ArtMethod* method,
- const CompiledMethod* compiled_method) {
- CHECK(method != nullptr);
- const void* method_code = nullptr;
- // If the code size is 0 it means the method was skipped due to profile guided compilation.
- if (compiled_method != nullptr && compiled_method->GetQuickCode().size() != 0u) {
- const void* code_ptr = MakeExecutable(compiled_method->GetQuickCode(),
- compiled_method->GetVmapTable(),
- compiled_method->GetInstructionSet());
- method_code =
- CompiledMethod::CodePointer(code_ptr, compiled_method->GetInstructionSet());
- LOG(INFO) << "MakeExecutable " << method->PrettyMethod() << " code=" << method_code;
- }
- Runtime::Current()->GetInstrumentation()->InitializeMethodsCode(
- method, /*aot_code=*/ method_code);
+ return code_and_metadata_.back().GetEntryPoint();
}
void CommonCompilerTestImpl::SetUp() {
@@ -207,7 +250,6 @@ void CommonCompilerTestImpl::OverrideInstructionSetFeatures(InstructionSet instr
void CommonCompilerTestImpl::SetUpRuntimeOptionsImpl() {
compiler_options_.reset(new CompilerOptions);
- verification_results_.reset(new VerificationResults());
ApplyInstructionSet();
}
@@ -221,7 +263,6 @@ void CommonCompilerTestImpl::SetCompilerKind(Compiler::Kind compiler_kind) {
void CommonCompilerTestImpl::TearDown() {
code_and_metadata_.clear();
- verification_results_.reset();
compiler_options_.reset();
}
@@ -229,7 +270,7 @@ void CommonCompilerTestImpl::CompileMethod(ArtMethod* method) {
CHECK(method != nullptr);
TimingLogger timings("CommonCompilerTestImpl::CompileMethod", false, false);
TimingLogger::ScopedTiming t(__FUNCTION__, &timings);
- CompiledMethodStorage storage(/*swap_fd=*/ -1);
+ OneCompiledMethodStorage storage;
CompiledMethod* compiled_method = nullptr;
{
DCHECK(!Runtime::Current()->IsStarted());
@@ -241,7 +282,6 @@ void CommonCompilerTestImpl::CompileMethod(ArtMethod* method) {
Handle<mirror::DexCache> dex_cache =
hs.NewHandle(GetClassLinker()->FindDexCache(self, dex_file));
Handle<mirror::ClassLoader> class_loader = hs.NewHandle(method->GetClassLoader());
- compiler_options_->verification_results_ = verification_results_.get();
if (method->IsNative()) {
compiled_method = compiler->JniCompile(method->GetAccessFlags(),
method->GetDexMethodIndex(),
@@ -257,48 +297,17 @@ void CommonCompilerTestImpl::CompileMethod(ArtMethod* method) {
dex_file,
dex_cache);
}
- compiler_options_->verification_results_ = nullptr;
+ CHECK(compiled_method != nullptr) << "Failed to compile " << method->PrettyMethod();
+ CHECK_EQ(reinterpret_cast<OneCompiledMethodStorage*>(compiled_method), &storage);
}
- CHECK(method != nullptr);
{
TimingLogger::ScopedTiming t2("MakeExecutable", &timings);
- MakeExecutable(method, compiled_method);
+ const void* method_code = MakeExecutable(storage.GetCode(),
+ storage.GetStackMap(),
+ storage.GetInstructionSet());
+ LOG(INFO) << "MakeExecutable " << method->PrettyMethod() << " code=" << method_code;
+ GetRuntime()->GetInstrumentation()->InitializeMethodsCode(method, /*aot_code=*/ method_code);
}
- CompiledMethod::ReleaseSwapAllocatedCompiledMethod(&storage, compiled_method);
-}
-
-void CommonCompilerTestImpl::CompileDirectMethod(Handle<mirror::ClassLoader> class_loader,
- const char* class_name,
- const char* method_name,
- const char* signature) {
- std::string class_descriptor(DotToDescriptor(class_name));
- Thread* self = Thread::Current();
- ClassLinker* class_linker = GetClassLinker();
- ObjPtr<mirror::Class> klass =
- class_linker->FindClass(self, class_descriptor.c_str(), class_loader);
- CHECK(klass != nullptr) << "Class not found " << class_name;
- auto pointer_size = class_linker->GetImagePointerSize();
- ArtMethod* method = klass->FindClassMethod(method_name, signature, pointer_size);
- CHECK(method != nullptr && method->IsDirect()) << "Direct method not found: "
- << class_name << "." << method_name << signature;
- CompileMethod(method);
-}
-
-void CommonCompilerTestImpl::CompileVirtualMethod(Handle<mirror::ClassLoader> class_loader,
- const char* class_name,
- const char* method_name,
- const char* signature) {
- std::string class_descriptor(DotToDescriptor(class_name));
- Thread* self = Thread::Current();
- ClassLinker* class_linker = GetClassLinker();
- ObjPtr<mirror::Class> klass =
- class_linker->FindClass(self, class_descriptor.c_str(), class_loader);
- CHECK(klass != nullptr) << "Class not found " << class_name;
- auto pointer_size = class_linker->GetImagePointerSize();
- ArtMethod* method = klass->FindClassMethod(method_name, signature, pointer_size);
- CHECK(method != nullptr && !method->IsDirect()) << "Virtual method not found: "
- << class_name << "." << method_name << signature;
- CompileMethod(method);
}
void CommonCompilerTestImpl::ClearBootImageOption() {
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 89cc1fa28f..f3cd13296c 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -24,25 +24,25 @@
#include "arch/instruction_set.h"
#include "arch/instruction_set_features.h"
+#include "base/macros.h"
#include "common_runtime_test.h"
#include "compiler.h"
#include "oat_file.h"
-namespace art {
+namespace art HIDDEN {
namespace mirror {
class ClassLoader;
} // namespace mirror
-class CompiledMethod;
class CompilerOptions;
class CumulativeLogger;
class DexFile;
class TimingLogger;
-class VerificationResults;
template<class T> class Handle;
-class CommonCompilerTestImpl {
+// Export all symbols in `CommonCompilerTestImpl` for dex2oat tests.
+class EXPORT CommonCompilerTestImpl {
public:
static std::unique_ptr<CompilerOptions> CreateCompilerOptions(InstructionSet instruction_set,
const std::string& variant);
@@ -55,9 +55,6 @@ class CommonCompilerTestImpl {
ArrayRef<const uint8_t> vmap_table,
InstructionSet instruction_set);
- void MakeExecutable(ArtMethod* method, const CompiledMethod* compiled_method)
- REQUIRES_SHARED(Locks::mutator_lock_);
-
protected:
void SetUp();
@@ -74,14 +71,6 @@ class CommonCompilerTestImpl {
void CompileMethod(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_);
- void CompileDirectMethod(Handle<mirror::ClassLoader> class_loader, const char* class_name,
- const char* method_name, const char* signature)
- REQUIRES_SHARED(Locks::mutator_lock_);
-
- void CompileVirtualMethod(Handle<mirror::ClassLoader> class_loader, const char* class_name,
- const char* method_name, const char* signature)
- REQUIRES_SHARED(Locks::mutator_lock_);
-
void ApplyInstructionSet();
void OverrideInstructionSetFeatures(InstructionSet instruction_set, const std::string& variant);
@@ -96,7 +85,6 @@ class CommonCompilerTestImpl {
= InstructionSetFeatures::FromCppDefines();
std::unique_ptr<CompilerOptions> compiler_options_;
- std::unique_ptr<VerificationResults> verification_results_;
protected:
virtual ClassLinker* GetClassLinker() = 0;
@@ -104,6 +92,8 @@ class CommonCompilerTestImpl {
private:
class CodeAndMetadata;
+ class OneCompiledMethodStorage;
+
std::vector<CodeAndMetadata> code_and_metadata_;
};
diff --git a/compiler/compiled_method-inl.h b/compiler/compiled_method-inl.h
deleted file mode 100644
index e60b30fed2..0000000000
--- a/compiler/compiled_method-inl.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_COMPILED_METHOD_INL_H_
-#define ART_COMPILER_COMPILED_METHOD_INL_H_
-
-#include "compiled_method.h"
-
-#include "base/array_ref.h"
-#include "base/length_prefixed_array.h"
-#include "linker/linker_patch.h"
-
-namespace art {
-
-inline ArrayRef<const uint8_t> CompiledCode::GetQuickCode() const {
- return GetArray(quick_code_);
-}
-
-template <typename T>
-inline ArrayRef<const T> CompiledCode::GetArray(const LengthPrefixedArray<T>* array) {
- if (array == nullptr) {
- return ArrayRef<const T>();
- }
- DCHECK_NE(array->size(), 0u);
- return ArrayRef<const T>(&array->At(0), array->size());
-}
-
-inline ArrayRef<const uint8_t> CompiledMethod::GetVmapTable() const {
- return GetArray(vmap_table_);
-}
-
-inline ArrayRef<const uint8_t> CompiledMethod::GetCFIInfo() const {
- return GetArray(cfi_info_);
-}
-
-inline ArrayRef<const linker::LinkerPatch> CompiledMethod::GetPatches() const {
- return GetArray(patches_);
-}
-
-} // namespace art
-
-#endif // ART_COMPILER_COMPILED_METHOD_INL_H_
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
deleted file mode 100644
index 03b87ef09e..0000000000
--- a/compiler/compiled_method.cc
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "compiled_method.h"
-
-#include "driver/compiled_method_storage.h"
-#include "utils/swap_space.h"
-
-namespace art {
-
-CompiledCode::CompiledCode(CompiledMethodStorage* storage,
- InstructionSet instruction_set,
- const ArrayRef<const uint8_t>& quick_code)
- : storage_(storage),
- quick_code_(storage->DeduplicateCode(quick_code)),
- packed_fields_(InstructionSetField::Encode(instruction_set)) {
-}
-
-CompiledCode::~CompiledCode() {
- GetStorage()->ReleaseCode(quick_code_);
-}
-
-bool CompiledCode::operator==(const CompiledCode& rhs) const {
- if (quick_code_ != nullptr) {
- if (rhs.quick_code_ == nullptr) {
- return false;
- } else if (quick_code_->size() != rhs.quick_code_->size()) {
- return false;
- } else {
- return std::equal(quick_code_->begin(), quick_code_->end(), rhs.quick_code_->begin());
- }
- }
- return (rhs.quick_code_ == nullptr);
-}
-
-size_t CompiledCode::AlignCode(size_t offset) const {
- return AlignCode(offset, GetInstructionSet());
-}
-
-size_t CompiledCode::AlignCode(size_t offset, InstructionSet instruction_set) {
- return RoundUp(offset, GetInstructionSetAlignment(instruction_set));
-}
-
-size_t CompiledCode::CodeDelta() const {
- return CodeDelta(GetInstructionSet());
-}
-
-size_t CompiledCode::CodeDelta(InstructionSet instruction_set) {
- switch (instruction_set) {
- case InstructionSet::kArm:
- case InstructionSet::kArm64:
- case InstructionSet::kX86:
- case InstructionSet::kX86_64:
- return 0;
- case InstructionSet::kThumb2: {
- // +1 to set the low-order bit so a BLX will switch to Thumb mode
- return 1;
- }
- default:
- LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
- UNREACHABLE();
- }
-}
-
-const void* CompiledCode::CodePointer(const void* code_pointer, InstructionSet instruction_set) {
- switch (instruction_set) {
- case InstructionSet::kArm:
- case InstructionSet::kArm64:
- case InstructionSet::kX86:
- case InstructionSet::kX86_64:
- return code_pointer;
- case InstructionSet::kThumb2: {
- uintptr_t address = reinterpret_cast<uintptr_t>(code_pointer);
- // Set the low-order bit so a BLX will switch to Thumb mode
- address |= 0x1;
- return reinterpret_cast<const void*>(address);
- }
- default:
- LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
- UNREACHABLE();
- }
-}
-
-CompiledMethod::CompiledMethod(CompiledMethodStorage* storage,
- InstructionSet instruction_set,
- const ArrayRef<const uint8_t>& quick_code,
- const ArrayRef<const uint8_t>& vmap_table,
- const ArrayRef<const uint8_t>& cfi_info,
- const ArrayRef<const linker::LinkerPatch>& patches)
- : CompiledCode(storage, instruction_set, quick_code),
- vmap_table_(storage->DeduplicateVMapTable(vmap_table)),
- cfi_info_(storage->DeduplicateCFIInfo(cfi_info)),
- patches_(storage->DeduplicateLinkerPatches(patches)) {
-}
-
-CompiledMethod* CompiledMethod::SwapAllocCompiledMethod(
- CompiledMethodStorage* storage,
- InstructionSet instruction_set,
- const ArrayRef<const uint8_t>& quick_code,
- const ArrayRef<const uint8_t>& vmap_table,
- const ArrayRef<const uint8_t>& cfi_info,
- const ArrayRef<const linker::LinkerPatch>& patches) {
- SwapAllocator<CompiledMethod> alloc(storage->GetSwapSpaceAllocator());
- CompiledMethod* ret = alloc.allocate(1);
- alloc.construct(ret,
- storage,
- instruction_set,
- quick_code,
- vmap_table,
- cfi_info, patches);
- return ret;
-}
-
-void CompiledMethod::ReleaseSwapAllocatedCompiledMethod(CompiledMethodStorage* storage,
- CompiledMethod* m) {
- SwapAllocator<CompiledMethod> alloc(storage->GetSwapSpaceAllocator());
- alloc.destroy(m);
- alloc.deallocate(m, 1);
-}
-
-CompiledMethod::~CompiledMethod() {
- CompiledMethodStorage* storage = GetStorage();
- storage->ReleaseLinkerPatches(patches_);
- storage->ReleaseCFIInfo(cfi_info_);
- storage->ReleaseVMapTable(vmap_table_);
-}
-
-} // namespace art
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
deleted file mode 100644
index e92777ff12..0000000000
--- a/compiler/compiled_method.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_COMPILED_METHOD_H_
-#define ART_COMPILER_COMPILED_METHOD_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arch/instruction_set.h"
-#include "base/bit_field.h"
-#include "base/bit_utils.h"
-
-namespace art {
-
-template <typename T> class ArrayRef;
-class CompiledMethodStorage;
-template<typename T> class LengthPrefixedArray;
-
-namespace linker {
-class LinkerPatch;
-} // namespace linker
-
-class CompiledCode {
- public:
- // For Quick to supply an code blob
- CompiledCode(CompiledMethodStorage* storage,
- InstructionSet instruction_set,
- const ArrayRef<const uint8_t>& quick_code);
-
- virtual ~CompiledCode();
-
- InstructionSet GetInstructionSet() const {
- return GetPackedField<InstructionSetField>();
- }
-
- ArrayRef<const uint8_t> GetQuickCode() const;
-
- bool operator==(const CompiledCode& rhs) const;
-
- // To align an offset from a page-aligned value to make it suitable
- // for code storage. For example on ARM, to ensure that PC relative
- // valu computations work out as expected.
- size_t AlignCode(size_t offset) const;
- static size_t AlignCode(size_t offset, InstructionSet instruction_set);
-
- // returns the difference between the code address and a usable PC.
- // mainly to cope with kThumb2 where the lower bit must be set.
- size_t CodeDelta() const;
- static size_t CodeDelta(InstructionSet instruction_set);
-
- // Returns a pointer suitable for invoking the code at the argument
- // code_pointer address. Mainly to cope with kThumb2 where the
- // lower bit must be set to indicate Thumb mode.
- static const void* CodePointer(const void* code_pointer, InstructionSet instruction_set);
-
- protected:
- static constexpr size_t kInstructionSetFieldSize =
- MinimumBitsToStore(static_cast<size_t>(InstructionSet::kLast));
- static constexpr size_t kNumberOfCompiledCodePackedBits = kInstructionSetFieldSize;
- static constexpr size_t kMaxNumberOfPackedBits = sizeof(uint32_t) * kBitsPerByte;
-
- template <typename T>
- static ArrayRef<const T> GetArray(const LengthPrefixedArray<T>* array);
-
- CompiledMethodStorage* GetStorage() {
- return storage_;
- }
-
- template <typename BitFieldType>
- typename BitFieldType::value_type GetPackedField() const {
- return BitFieldType::Decode(packed_fields_);
- }
-
- template <typename BitFieldType>
- void SetPackedField(typename BitFieldType::value_type value) {
- DCHECK(IsUint<BitFieldType::size>(static_cast<uintptr_t>(value)));
- packed_fields_ = BitFieldType::Update(value, packed_fields_);
- }
-
- private:
- using InstructionSetField = BitField<InstructionSet, 0u, kInstructionSetFieldSize>;
-
- CompiledMethodStorage* const storage_;
-
- // Used to store the compiled code.
- const LengthPrefixedArray<uint8_t>* const quick_code_;
-
- uint32_t packed_fields_;
-};
-
-class CompiledMethod final : public CompiledCode {
- public:
- // Constructs a CompiledMethod.
- // Note: Consider using the static allocation methods below that will allocate the CompiledMethod
- // in the swap space.
- CompiledMethod(CompiledMethodStorage* storage,
- InstructionSet instruction_set,
- const ArrayRef<const uint8_t>& quick_code,
- const ArrayRef<const uint8_t>& vmap_table,
- const ArrayRef<const uint8_t>& cfi_info,
- const ArrayRef<const linker::LinkerPatch>& patches);
-
- virtual ~CompiledMethod();
-
- static CompiledMethod* SwapAllocCompiledMethod(
- CompiledMethodStorage* storage,
- InstructionSet instruction_set,
- const ArrayRef<const uint8_t>& quick_code,
- const ArrayRef<const uint8_t>& vmap_table,
- const ArrayRef<const uint8_t>& cfi_info,
- const ArrayRef<const linker::LinkerPatch>& patches);
-
- static void ReleaseSwapAllocatedCompiledMethod(CompiledMethodStorage* storage, CompiledMethod* m);
-
- bool IsIntrinsic() const {
- return GetPackedField<IsIntrinsicField>();
- }
-
- // Marks the compiled method as being generated using an intrinsic codegen.
- // Such methods have no relationships to their code items.
- // This affects debug information generated at link time.
- void MarkAsIntrinsic() {
- DCHECK(!IsIntrinsic());
- SetPackedField<IsIntrinsicField>(/* value= */ true);
- }
-
- ArrayRef<const uint8_t> GetVmapTable() const;
-
- ArrayRef<const uint8_t> GetCFIInfo() const;
-
- ArrayRef<const linker::LinkerPatch> GetPatches() const;
-
- private:
- static constexpr size_t kIsIntrinsicLsb = kNumberOfCompiledCodePackedBits;
- static constexpr size_t kIsIntrinsicSize = 1u;
- static constexpr size_t kNumberOfCompiledMethodPackedBits = kIsIntrinsicLsb + kIsIntrinsicSize;
- static_assert(kNumberOfCompiledMethodPackedBits <= CompiledCode::kMaxNumberOfPackedBits,
- "Too many packed fields.");
-
- using IsIntrinsicField = BitField<bool, kIsIntrinsicLsb, kIsIntrinsicSize>;
-
- // For quick code, holds code infos which contain stack maps, inline information, and etc.
- const LengthPrefixedArray<uint8_t>* const vmap_table_;
- // For quick code, a FDE entry for the debug_frame section.
- const LengthPrefixedArray<uint8_t>* const cfi_info_;
- // For quick code, linker patches needed by the method.
- const LengthPrefixedArray<linker::LinkerPatch>* const patches_;
-};
-
-} // namespace art
-
-#endif // ART_COMPILER_COMPILED_METHOD_H_
diff --git a/compiler/compiler.cc b/compiler/compiler.cc
index 98d73396bc..e2587c1253 100644
--- a/compiler/compiler.cc
+++ b/compiler/compiler.cc
@@ -25,10 +25,10 @@
#include "oat.h"
#include "optimizing/optimizing_compiler.h"
-namespace art {
+namespace art HIDDEN {
Compiler* Compiler::Create(const CompilerOptions& compiler_options,
- CompiledMethodStorage* storage,
+ CompiledCodeStorage* storage,
Compiler::Kind kind) {
// Check that oat version when runtime was compiled matches the oat version of the compiler.
constexpr std::array<uint8_t, 4> compiler_oat_version = OatHeader::kOatVersion;
diff --git a/compiler/compiler.h b/compiler/compiler.h
index afa0dbab60..ce785bb769 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -17,12 +17,13 @@
#ifndef ART_COMPILER_COMPILER_H_
#define ART_COMPILER_COMPILER_H_
+#include "base/macros.h"
#include "base/mutex.h"
#include "base/os.h"
#include "compilation_kind.h"
#include "dex/invoke_type.h"
-namespace art {
+namespace art HIDDEN {
namespace dex {
struct CodeItem;
@@ -38,8 +39,8 @@ class DexCache;
} // namespace mirror
class ArtMethod;
+class CompiledCodeStorage;
class CompiledMethod;
-class CompiledMethodStorage;
class CompilerOptions;
class DexFile;
template<class T> class Handle;
@@ -52,9 +53,9 @@ class Compiler {
kOptimizing
};
- static Compiler* Create(const CompilerOptions& compiler_options,
- CompiledMethodStorage* storage,
- Kind kind);
+ EXPORT static Compiler* Create(const CompilerOptions& compiler_options,
+ CompiledCodeStorage* storage,
+ Kind kind);
virtual bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const = 0;
@@ -99,7 +100,7 @@ class Compiler {
protected:
Compiler(const CompilerOptions& compiler_options,
- CompiledMethodStorage* storage,
+ CompiledCodeStorage* storage,
uint64_t warning) :
compiler_options_(compiler_options),
storage_(storage),
@@ -110,13 +111,13 @@ class Compiler {
return compiler_options_;
}
- CompiledMethodStorage* GetCompiledMethodStorage() const {
+ CompiledCodeStorage* GetCompiledCodeStorage() const {
return storage_;
}
private:
const CompilerOptions& compiler_options_;
- CompiledMethodStorage* const storage_;
+ CompiledCodeStorage* const storage_;
const uint64_t maximum_compilation_time_before_warning_;
DISALLOW_COPY_AND_ASSIGN(Compiler);
diff --git a/compiler/compiler_reflection_test.cc b/compiler/compiler_reflection_test.cc
new file mode 100644
index 0000000000..f3c07db136
--- /dev/null
+++ b/compiler/compiler_reflection_test.cc
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "reflection.h"
+
+#include "base/macros.h"
+#include "class_linker.h"
+#include "common_compiler_test.h"
+#include "handle_scope-inl.h"
+#include "jni/jni_internal.h"
+#include "mirror/class.h"
+#include "mirror/class_loader.h"
+
+namespace art HIDDEN {
+
+class CompilerReflectionTest : public CommonCompilerTest {};
+
+TEST_F(CompilerReflectionTest, StaticMainMethod) {
+ ScopedObjectAccess soa(Thread::Current());
+ jobject jclass_loader = LoadDex("Main");
+ StackHandleScope<1> hs(soa.Self());
+ Handle<mirror::ClassLoader> class_loader(
+ hs.NewHandle(soa.Decode<mirror::ClassLoader>(jclass_loader)));
+
+ ObjPtr<mirror::Class> klass = class_linker_->FindClass(soa.Self(), "LMain;", class_loader);
+ ASSERT_TRUE(klass != nullptr);
+
+ ArtMethod* method = klass->FindClassMethod("main",
+ "([Ljava/lang/String;)V",
+ kRuntimePointerSize);
+ ASSERT_TRUE(method != nullptr);
+ ASSERT_TRUE(method->IsStatic());
+
+ CompileMethod(method);
+
+ // Start runtime.
+ bool started = runtime_->Start();
+ CHECK(started);
+ soa.Self()->TransitionFromSuspendedToRunnable();
+
+ jvalue args[1];
+ args[0].l = nullptr;
+ InvokeWithJValues(soa, nullptr, jni::EncodeArtMethod(method), args);
+}
+
+} // namespace art
diff --git a/compiler/debug/debug_info.h b/compiler/debug/debug_info.h
index 04c6991ea3..4027f114ec 100644
--- a/compiler/debug/debug_info.h
+++ b/compiler/debug/debug_info.h
@@ -20,9 +20,10 @@
#include <map>
#include "base/array_ref.h"
+#include "base/macros.h"
#include "method_debug_info.h"
-namespace art {
+namespace art HIDDEN {
class DexFile;
namespace debug {
diff --git a/compiler/debug/dwarf/dwarf_test.cc b/compiler/debug/dwarf/dwarf_test.cc
index 8897e45584..14c92b2380 100644
--- a/compiler/debug/dwarf/dwarf_test.cc
+++ b/compiler/debug/dwarf/dwarf_test.cc
@@ -23,7 +23,7 @@
#include "dwarf/headers.h"
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
namespace dwarf {
// Run the tests only on host since we need objdump.
diff --git a/compiler/debug/dwarf/dwarf_test.h b/compiler/debug/dwarf/dwarf_test.h
index bad986a330..1a0a798d74 100644
--- a/compiler/debug/dwarf/dwarf_test.h
+++ b/compiler/debug/dwarf/dwarf_test.h
@@ -26,6 +26,7 @@
#include <set>
#include <string>
+#include "base/macros.h"
#include "base/os.h"
#include "base/unix_file/fd_file.h"
#include "common_compiler_test.h"
@@ -33,7 +34,7 @@
#include "gtest/gtest.h"
#include "stream/file_output_stream.h"
-namespace art {
+namespace art HIDDEN {
namespace dwarf {
#define DW_CHECK(substring) Check(substring, false, __FILE__, __LINE__)
diff --git a/compiler/debug/elf_compilation_unit.h b/compiler/debug/elf_compilation_unit.h
index b1d89ebeb2..1d7523c350 100644
--- a/compiler/debug/elf_compilation_unit.h
+++ b/compiler/debug/elf_compilation_unit.h
@@ -19,9 +19,10 @@
#include <vector>
+#include "base/macros.h"
#include "debug/method_debug_info.h"
-namespace art {
+namespace art HIDDEN {
namespace debug {
struct ElfCompilationUnit {
diff --git a/compiler/debug/elf_debug_frame_writer.h b/compiler/debug/elf_debug_frame_writer.h
index 094e8871b9..6b72262e26 100644
--- a/compiler/debug/elf_debug_frame_writer.h
+++ b/compiler/debug/elf_debug_frame_writer.h
@@ -20,13 +20,14 @@
#include <vector>
#include "arch/instruction_set.h"
+#include "base/macros.h"
#include "debug/method_debug_info.h"
#include "dwarf/debug_frame_opcode_writer.h"
#include "dwarf/dwarf_constants.h"
#include "dwarf/headers.h"
#include "elf/elf_builder.h"
-namespace art {
+namespace art HIDDEN {
namespace debug {
static constexpr bool kWriteDebugFrameHdr = false;
@@ -88,6 +89,10 @@ static void WriteCIE(InstructionSet isa, /*inout*/ std::vector<uint8_t>* buffer)
WriteCIE(is64bit, return_reg, opcodes, buffer);
return;
}
+ case InstructionSet::kRiscv64: {
+ UNIMPLEMENTED(FATAL);
+ return;
+ }
case InstructionSet::kX86: {
// FIXME: Add fp registers once libunwind adds support for them. Bug: 20491296
constexpr bool generate_opcodes_for_x86_fp = false;
diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h
index 986c7e820f..9915a24a5f 100644
--- a/compiler/debug/elf_debug_info_writer.h
+++ b/compiler/debug/elf_debug_info_writer.h
@@ -22,6 +22,7 @@
#include <vector>
#include "art_field-inl.h"
+#include "base/macros.h"
#include "debug/elf_compilation_unit.h"
#include "debug/elf_debug_loc_writer.h"
#include "debug/method_debug_info.h"
@@ -32,14 +33,14 @@
#include "dwarf/debug_info_entry_writer.h"
#include "elf/elf_builder.h"
#include "heap_poisoning.h"
-#include "linear_alloc.h"
+#include "linear_alloc-inl.h"
#include "mirror/array.h"
#include "mirror/class-inl.h"
#include "mirror/class.h"
#include "oat_file.h"
#include "obj_ptr-inl.h"
-namespace art {
+namespace art HIDDEN {
namespace debug {
static std::vector<const char*> GetParamNames(const MethodDebugInfo* mi) {
@@ -478,7 +479,9 @@ class ElfCompilationUnitWriter {
if (methods_ptr == nullptr) {
// Some types might have no methods. Allocate empty array instead.
LinearAlloc* allocator = Runtime::Current()->GetLinearAlloc();
- void* storage = allocator->Alloc(Thread::Current(), sizeof(LengthPrefixedArray<ArtMethod>));
+ void* storage = allocator->Alloc(Thread::Current(),
+ sizeof(LengthPrefixedArray<ArtMethod>),
+ LinearAllocKind::kNoGCRoots);
methods_ptr = new (storage) LengthPrefixedArray<ArtMethod>(0);
type->SetMethodsPtr(methods_ptr, 0, 0);
DCHECK(type->GetMethodsPtr() != nullptr);
diff --git a/compiler/debug/elf_debug_line_writer.h b/compiler/debug/elf_debug_line_writer.h
index 8d62747c66..4896bc1e9b 100644
--- a/compiler/debug/elf_debug_line_writer.h
+++ b/compiler/debug/elf_debug_line_writer.h
@@ -20,6 +20,7 @@
#include <unordered_set>
#include <vector>
+#include "base/macros.h"
#include "debug/elf_compilation_unit.h"
#include "debug/src_map_elem.h"
#include "dex/dex_file-inl.h"
@@ -29,7 +30,7 @@
#include "oat_file.h"
#include "stack_map.h"
-namespace art {
+namespace art HIDDEN {
namespace debug {
using PositionInfos = std::vector<DexFile::PositionInfo>;
@@ -73,6 +74,7 @@ class ElfDebugLineWriter {
code_factor_bits_ = 2; // 32-bit instructions
break;
case InstructionSet::kNone:
+ case InstructionSet::kRiscv64:
case InstructionSet::kX86:
case InstructionSet::kX86_64:
break;
diff --git a/compiler/debug/elf_debug_loc_writer.h b/compiler/debug/elf_debug_loc_writer.h
index 37ab948119..8cf476ed2d 100644
--- a/compiler/debug/elf_debug_loc_writer.h
+++ b/compiler/debug/elf_debug_loc_writer.h
@@ -21,13 +21,13 @@
#include <map>
#include "arch/instruction_set.h"
-#include "compiled_method.h"
+#include "base/macros.h"
#include "debug/method_debug_info.h"
#include "dwarf/debug_info_entry_writer.h"
#include "dwarf/register.h"
#include "stack_map.h"
-namespace art {
+namespace art HIDDEN {
namespace debug {
using Reg = dwarf::Reg;
@@ -38,6 +38,8 @@ static Reg GetDwarfCoreReg(InstructionSet isa, int machine_reg) {
return Reg::ArmCore(machine_reg);
case InstructionSet::kArm64:
return Reg::Arm64Core(machine_reg);
+ case InstructionSet::kRiscv64:
+ return Reg::Riscv64Core(machine_reg);
case InstructionSet::kX86:
return Reg::X86Core(machine_reg);
case InstructionSet::kX86_64:
@@ -55,6 +57,8 @@ static Reg GetDwarfFpReg(InstructionSet isa, int machine_reg) {
return Reg::ArmFp(machine_reg);
case InstructionSet::kArm64:
return Reg::Arm64Fp(machine_reg);
+ case InstructionSet::kRiscv64:
+ return Reg::Riscv64Fp(machine_reg);
case InstructionSet::kX86:
return Reg::X86Fp(machine_reg);
case InstructionSet::kX86_64:
diff --git a/compiler/debug/elf_debug_writer.cc b/compiler/debug/elf_debug_writer.cc
index 765a81d4f8..8f64d73aa7 100644
--- a/compiler/debug/elf_debug_writer.cc
+++ b/compiler/debug/elf_debug_writer.cc
@@ -38,7 +38,7 @@
#include "oat.h"
#include "stream/vector_output_stream.h"
-namespace art {
+namespace art HIDDEN {
namespace debug {
using ElfRuntimeTypes = std::conditional<sizeof(void*) == 4, ElfTypes32, ElfTypes64>::type;
@@ -208,7 +208,8 @@ std::vector<uint8_t> MakeElfFileForJIT(
using Reader = ElfDebugReader<ElfTypes>;
Reader reader(buffer);
reader.VisitFunctionSymbols([&](Elf_Sym sym, const char*) {
- DCHECK_EQ(sym.st_value, method_info.code_address + CompiledMethod::CodeDelta(isa));
+ DCHECK_EQ(sym.st_value,
+ method_info.code_address + GetInstructionSetEntryPointAdjustment(isa));
DCHECK_EQ(sym.st_size, method_info.code_size);
num_syms++;
});
diff --git a/compiler/debug/elf_debug_writer.h b/compiler/debug/elf_debug_writer.h
index 1ce3c6f6f8..72b028cb97 100644
--- a/compiler/debug/elf_debug_writer.h
+++ b/compiler/debug/elf_debug_writer.h
@@ -27,7 +27,7 @@
#include "dwarf/dwarf_constants.h"
#include "elf/elf_builder.h"
-namespace art {
+namespace art HIDDEN {
class OatHeader;
struct JITCodeEntry;
namespace mirror {
@@ -37,11 +37,11 @@ namespace debug {
struct MethodDebugInfo;
template <typename ElfTypes>
-void WriteDebugInfo(
+EXPORT void WriteDebugInfo(
ElfBuilder<ElfTypes>* builder,
const DebugInfo& debug_info);
-std::vector<uint8_t> MakeMiniDebugInfo(
+EXPORT std::vector<uint8_t> MakeMiniDebugInfo(
InstructionSet isa,
const InstructionSetFeatures* features,
uint64_t text_section_address,
diff --git a/compiler/debug/elf_symtab_writer.h b/compiler/debug/elf_symtab_writer.h
index 410f704582..fcd6696fa8 100644
--- a/compiler/debug/elf_symtab_writer.h
+++ b/compiler/debug/elf_symtab_writer.h
@@ -21,6 +21,7 @@
#include <unordered_set>
#include <unordered_map>
+#include "base/macros.h"
#include "base/utils.h"
#include "debug/debug_info.h"
#include "debug/method_debug_info.h"
@@ -29,7 +30,7 @@
#include "dex/dex_file-inl.h"
#include "elf/elf_builder.h"
-namespace art {
+namespace art HIDDEN {
namespace debug {
// The ARM specification defines three special mapping symbols
@@ -153,7 +154,7 @@ static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
uint64_t address = info.code_address;
address += info.is_code_address_text_relative ? text->GetAddress() : 0;
// Add in code delta, e.g., thumb bit 0 for Thumb2 code.
- address += CompiledMethod::CodeDelta(info.isa);
+ address += GetInstructionSetEntryPointAdjustment(info.isa);
symtab->Add(name_offset, text, address, info.code_size, STB_GLOBAL, STT_FUNC);
}
// Add symbols for dex files.
diff --git a/compiler/debug/method_debug_info.h b/compiler/debug/method_debug_info.h
index 152db6eaf0..b83c6e2052 100644
--- a/compiler/debug/method_debug_info.h
+++ b/compiler/debug/method_debug_info.h
@@ -21,9 +21,10 @@
#include "arch/instruction_set.h"
#include "base/array_ref.h"
+#include "base/macros.h"
#include "dex/dex_file.h"
-namespace art {
+namespace art HIDDEN {
namespace debug {
struct MethodDebugInfo {
diff --git a/compiler/debug/src_map_elem.h b/compiler/debug/src_map_elem.h
index 5286b8c4dc..646a1f0fc7 100644
--- a/compiler/debug/src_map_elem.h
+++ b/compiler/debug/src_map_elem.h
@@ -19,7 +19,9 @@
#include <stdint.h>
-namespace art {
+#include "base/macros.h"
+
+namespace art HIDDEN {
class SrcMapElem {
public:
diff --git a/compiler/debug/src_map_elem_test.cc b/compiler/debug/src_map_elem_test.cc
index ceaa53fa99..bdbafd5b40 100644
--- a/compiler/debug/src_map_elem_test.cc
+++ b/compiler/debug/src_map_elem_test.cc
@@ -20,7 +20,7 @@
#include "base/macros.h"
-namespace art {
+namespace art HIDDEN {
namespace debug {
TEST(SrcMapElem, Operators) {
diff --git a/compiler/dex/inline_method_analyser.cc b/compiler/dex/inline_method_analyser.cc
index 32019657d3..381db3d21d 100644
--- a/compiler/dex/inline_method_analyser.cc
+++ b/compiler/dex/inline_method_analyser.cc
@@ -33,7 +33,7 @@
* only to allow the debugger to check whether a method has been inlined.
*/
-namespace art {
+namespace art HIDDEN {
namespace { // anonymous namespace
diff --git a/compiler/dex/inline_method_analyser.h b/compiler/dex/inline_method_analyser.h
index e1d652a642..99d07c6152 100644
--- a/compiler/dex/inline_method_analyser.h
+++ b/compiler/dex/inline_method_analyser.h
@@ -28,7 +28,7 @@
* only to allow the debugger to check whether a method has been inlined.
*/
-namespace art {
+namespace art HIDDEN {
class CodeItemDataAccessor;
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
deleted file mode 100644
index b819d0effa..0000000000
--- a/compiler/dex/verification_results.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "verification_results.h"
-
-#include <android-base/logging.h>
-
-#include "base/mutex-inl.h"
-#include "base/stl_util.h"
-#include "runtime.h"
-#include "thread-current-inl.h"
-#include "thread.h"
-
-namespace art {
-
-VerificationResults::VerificationResults()
- : uncompilable_methods_lock_("compiler uncompilable methods lock"),
- rejected_classes_lock_("compiler rejected classes lock") {}
-
-// Non-inline version of the destructor, as it does some implicit work not worth
-// inlining.
-VerificationResults::~VerificationResults() {}
-
-void VerificationResults::AddRejectedClass(ClassReference ref) {
- {
- WriterMutexLock mu(Thread::Current(), rejected_classes_lock_);
- rejected_classes_.insert(ref);
- }
- DCHECK(IsClassRejected(ref));
-}
-
-bool VerificationResults::IsClassRejected(ClassReference ref) const {
- ReaderMutexLock mu(Thread::Current(), rejected_classes_lock_);
- return rejected_classes_.find(ref) != rejected_classes_.end();
-}
-
-void VerificationResults::AddUncompilableMethod(MethodReference ref) {
- {
- WriterMutexLock mu(Thread::Current(), uncompilable_methods_lock_);
- uncompilable_methods_.insert(ref);
- }
- DCHECK(IsUncompilableMethod(ref));
-}
-
-bool VerificationResults::IsUncompilableMethod(MethodReference ref) const {
- ReaderMutexLock mu(Thread::Current(), uncompilable_methods_lock_);
- return uncompilable_methods_.find(ref) != uncompilable_methods_.end();
-}
-
-
-} // namespace art
diff --git a/compiler/dex/verification_results.h b/compiler/dex/verification_results.h
deleted file mode 100644
index b294ed3020..0000000000
--- a/compiler/dex/verification_results.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_VERIFICATION_RESULTS_H_
-#define ART_COMPILER_DEX_VERIFICATION_RESULTS_H_
-
-#include <set>
-
-#include "base/macros.h"
-#include "base/mutex.h"
-#include "dex/class_reference.h"
-#include "dex/method_reference.h"
-
-namespace art {
-
-namespace verifier {
-class VerifierDepsTest;
-} // namespace verifier
-
-// Used by CompilerCallbacks to track verification information from the Runtime.
-class VerificationResults {
- public:
- VerificationResults();
- ~VerificationResults();
-
- void AddRejectedClass(ClassReference ref) REQUIRES(!rejected_classes_lock_);
- bool IsClassRejected(ClassReference ref) const REQUIRES(!rejected_classes_lock_);
-
- void AddUncompilableMethod(MethodReference ref) REQUIRES(!uncompilable_methods_lock_);
- bool IsUncompilableMethod(MethodReference ref) const REQUIRES(!uncompilable_methods_lock_);
-
- private:
- // TODO: External locking during CompilerDriver::PreCompile(), no locking during compilation.
- mutable ReaderWriterMutex uncompilable_methods_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
- std::set<MethodReference> uncompilable_methods_ GUARDED_BY(uncompilable_methods_lock_);
-
- // Rejected classes.
- // TODO: External locking during CompilerDriver::PreCompile(), no locking during compilation.
- mutable ReaderWriterMutex rejected_classes_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
- std::set<ClassReference> rejected_classes_ GUARDED_BY(rejected_classes_lock_);
-
- friend class verifier::VerifierDepsTest;
-};
-
-} // namespace art
-
-#endif // ART_COMPILER_DEX_VERIFICATION_RESULTS_H_
diff --git a/compiler/driver/compiled_code_storage.h b/compiler/driver/compiled_code_storage.h
new file mode 100644
index 0000000000..cef7398ec1
--- /dev/null
+++ b/compiler/driver/compiled_code_storage.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DRIVER_COMPILED_CODE_STORAGE_H_
+#define ART_COMPILER_DRIVER_COMPILED_CODE_STORAGE_H_
+
+#include <string>
+
+#include "base/array_ref.h"
+#include "base/macros.h"
+
+namespace art HIDDEN {
+
+namespace linker {
+class LinkerPatch;
+} // namespace linker
+
+class CompiledMethod;
+enum class InstructionSet;
+
+// Interface for storing AOT-compiled artifacts.
+// These artifacts include compiled method code and related stack maps and
+// linker patches as well as the compiled thunk code required for some kinds
+// of linker patches.
+//
+// This interface is used for passing AOT-compiled code and metadata produced
+// by the `libart-compiler` to `dex2oat`. The `CompiledMethod` created by
+// `dex2oat` is completely opaque to the `libart-compiler`.
+class CompiledCodeStorage {
+ public:
+ virtual CompiledMethod* CreateCompiledMethod(InstructionSet instruction_set,
+ ArrayRef<const uint8_t> code,
+ ArrayRef<const uint8_t> stack_map,
+ ArrayRef<const uint8_t> cfi,
+ ArrayRef<const linker::LinkerPatch> patches,
+ bool is_intrinsic) = 0;
+
+ // TODO: Rewrite the interface for passing thunks to the `dex2oat` to reduce
+ // locking. The `OptimizingCompiler` is currently calling `GetThunkCode()`
+ // and locking a mutex there for every `LinkerPatch` that needs a thunk to
+ // check whether we need to compile it. Using a thunk compiler interface,
+ // we could drive this from the `dex2oat` side and lock the mutex at most
+ // once per `CreateCompiledMethod()` for any number of patches.
+ virtual ArrayRef<const uint8_t> GetThunkCode(const linker::LinkerPatch& patch,
+ /*out*/ std::string* debug_name = nullptr) = 0;
+ virtual void SetThunkCode(const linker::LinkerPatch& patch,
+ ArrayRef<const uint8_t> code,
+ const std::string& debug_name) = 0;
+
+ protected:
+ CompiledCodeStorage() {}
+ ~CompiledCodeStorage() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(CompiledCodeStorage);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_DRIVER_COMPILED_CODE_STORAGE_H_
diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc
deleted file mode 100644
index 4857ec0931..0000000000
--- a/compiler/driver/compiled_method_storage.cc
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <algorithm>
-#include <ostream>
-
-#include "compiled_method_storage.h"
-
-#include <android-base/logging.h>
-
-#include "base/data_hash.h"
-#include "base/utils.h"
-#include "compiled_method.h"
-#include "linker/linker_patch.h"
-#include "thread-current-inl.h"
-#include "utils/dedupe_set-inl.h"
-#include "utils/swap_space.h"
-
-namespace art {
-
-namespace { // anonymous namespace
-
-template <typename T>
-const LengthPrefixedArray<T>* CopyArray(SwapSpace* swap_space, const ArrayRef<const T>& array) {
- DCHECK(!array.empty());
- SwapAllocator<uint8_t> allocator(swap_space);
- void* storage = allocator.allocate(LengthPrefixedArray<T>::ComputeSize(array.size()));
- LengthPrefixedArray<T>* array_copy = new(storage) LengthPrefixedArray<T>(array.size());
- std::copy(array.begin(), array.end(), array_copy->begin());
- return array_copy;
-}
-
-template <typename T>
-void ReleaseArray(SwapSpace* swap_space, const LengthPrefixedArray<T>* array) {
- SwapAllocator<uint8_t> allocator(swap_space);
- size_t size = LengthPrefixedArray<T>::ComputeSize(array->size());
- array->~LengthPrefixedArray<T>();
- allocator.deallocate(const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(array)), size);
-}
-
-} // anonymous namespace
-
-template <typename T, typename DedupeSetType>
-inline const LengthPrefixedArray<T>* CompiledMethodStorage::AllocateOrDeduplicateArray(
- const ArrayRef<const T>& data,
- DedupeSetType* dedupe_set) {
- if (data.empty()) {
- return nullptr;
- } else if (!DedupeEnabled()) {
- return CopyArray(swap_space_.get(), data);
- } else {
- return dedupe_set->Add(Thread::Current(), data);
- }
-}
-
-template <typename T>
-inline void CompiledMethodStorage::ReleaseArrayIfNotDeduplicated(
- const LengthPrefixedArray<T>* array) {
- if (array != nullptr && !DedupeEnabled()) {
- ReleaseArray(swap_space_.get(), array);
- }
-}
-
-template <typename ContentType>
-class CompiledMethodStorage::DedupeHashFunc {
- private:
- static constexpr bool kUseMurmur3Hash = true;
-
- public:
- size_t operator()(const ArrayRef<ContentType>& array) const {
- return DataHash()(array);
- }
-};
-
-template <typename T>
-class CompiledMethodStorage::LengthPrefixedArrayAlloc {
- public:
- explicit LengthPrefixedArrayAlloc(SwapSpace* swap_space)
- : swap_space_(swap_space) {
- }
-
- const LengthPrefixedArray<T>* Copy(const ArrayRef<const T>& array) {
- return CopyArray(swap_space_, array);
- }
-
- void Destroy(const LengthPrefixedArray<T>* array) {
- ReleaseArray(swap_space_, array);
- }
-
- private:
- SwapSpace* const swap_space_;
-};
-
-class CompiledMethodStorage::ThunkMapKey {
- public:
- ThunkMapKey(linker::LinkerPatch::Type type, uint32_t custom_value1, uint32_t custom_value2)
- : type_(type), custom_value1_(custom_value1), custom_value2_(custom_value2) {}
-
- bool operator<(const ThunkMapKey& other) const {
- if (custom_value1_ != other.custom_value1_) {
- return custom_value1_ < other.custom_value1_;
- }
- if (custom_value2_ != other.custom_value2_) {
- return custom_value2_ < other.custom_value2_;
- }
- return type_ < other.type_;
- }
-
- private:
- linker::LinkerPatch::Type type_;
- uint32_t custom_value1_;
- uint32_t custom_value2_;
-};
-
-class CompiledMethodStorage::ThunkMapValue {
- public:
- ThunkMapValue(std::vector<uint8_t, SwapAllocator<uint8_t>>&& code,
- const std::string& debug_name)
- : code_(std::move(code)), debug_name_(debug_name) {}
-
- ArrayRef<const uint8_t> GetCode() const {
- return ArrayRef<const uint8_t>(code_);
- }
-
- const std::string& GetDebugName() const {
- return debug_name_;
- }
-
- private:
- std::vector<uint8_t, SwapAllocator<uint8_t>> code_;
- std::string debug_name_;
-};
-
-CompiledMethodStorage::CompiledMethodStorage(int swap_fd)
- : swap_space_(swap_fd == -1 ? nullptr : new SwapSpace(swap_fd, 10 * MB)),
- dedupe_enabled_(true),
- dedupe_code_("dedupe code", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
- dedupe_vmap_table_("dedupe vmap table",
- LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
- dedupe_cfi_info_("dedupe cfi info", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
- dedupe_linker_patches_("dedupe cfi info",
- LengthPrefixedArrayAlloc<linker::LinkerPatch>(swap_space_.get())),
- thunk_map_lock_("thunk_map_lock"),
- thunk_map_(std::less<ThunkMapKey>(), SwapAllocator<ThunkMapValueType>(swap_space_.get())) {
-}
-
-CompiledMethodStorage::~CompiledMethodStorage() {
- // All done by member destructors.
-}
-
-void CompiledMethodStorage::DumpMemoryUsage(std::ostream& os, bool extended) const {
- if (swap_space_.get() != nullptr) {
- const size_t swap_size = swap_space_->GetSize();
- os << " swap=" << PrettySize(swap_size) << " (" << swap_size << "B)";
- }
- if (extended) {
- Thread* self = Thread::Current();
- os << "\nCode dedupe: " << dedupe_code_.DumpStats(self);
- os << "\nVmap table dedupe: " << dedupe_vmap_table_.DumpStats(self);
- os << "\nCFI info dedupe: " << dedupe_cfi_info_.DumpStats(self);
- }
-}
-
-const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateCode(
- const ArrayRef<const uint8_t>& code) {
- return AllocateOrDeduplicateArray(code, &dedupe_code_);
-}
-
-void CompiledMethodStorage::ReleaseCode(const LengthPrefixedArray<uint8_t>* code) {
- ReleaseArrayIfNotDeduplicated(code);
-}
-
-size_t CompiledMethodStorage::UniqueCodeEntries() const {
- DCHECK(DedupeEnabled());
- return dedupe_code_.Size(Thread::Current());
-}
-
-const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateVMapTable(
- const ArrayRef<const uint8_t>& table) {
- return AllocateOrDeduplicateArray(table, &dedupe_vmap_table_);
-}
-
-void CompiledMethodStorage::ReleaseVMapTable(const LengthPrefixedArray<uint8_t>* table) {
- ReleaseArrayIfNotDeduplicated(table);
-}
-
-size_t CompiledMethodStorage::UniqueVMapTableEntries() const {
- DCHECK(DedupeEnabled());
- return dedupe_vmap_table_.Size(Thread::Current());
-}
-
-const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateCFIInfo(
- const ArrayRef<const uint8_t>& cfi_info) {
- return AllocateOrDeduplicateArray(cfi_info, &dedupe_cfi_info_);
-}
-
-void CompiledMethodStorage::ReleaseCFIInfo(const LengthPrefixedArray<uint8_t>* cfi_info) {
- ReleaseArrayIfNotDeduplicated(cfi_info);
-}
-
-size_t CompiledMethodStorage::UniqueCFIInfoEntries() const {
- DCHECK(DedupeEnabled());
- return dedupe_cfi_info_.Size(Thread::Current());
-}
-
-const LengthPrefixedArray<linker::LinkerPatch>* CompiledMethodStorage::DeduplicateLinkerPatches(
- const ArrayRef<const linker::LinkerPatch>& linker_patches) {
- return AllocateOrDeduplicateArray(linker_patches, &dedupe_linker_patches_);
-}
-
-void CompiledMethodStorage::ReleaseLinkerPatches(
- const LengthPrefixedArray<linker::LinkerPatch>* linker_patches) {
- ReleaseArrayIfNotDeduplicated(linker_patches);
-}
-
-size_t CompiledMethodStorage::UniqueLinkerPatchesEntries() const {
- DCHECK(DedupeEnabled());
- return dedupe_linker_patches_.Size(Thread::Current());
-}
-
-CompiledMethodStorage::ThunkMapKey CompiledMethodStorage::GetThunkMapKey(
- const linker::LinkerPatch& linker_patch) {
- uint32_t custom_value1 = 0u;
- uint32_t custom_value2 = 0u;
- switch (linker_patch.GetType()) {
- case linker::LinkerPatch::Type::kCallEntrypoint:
- custom_value1 = linker_patch.EntrypointOffset();
- break;
- case linker::LinkerPatch::Type::kBakerReadBarrierBranch:
- custom_value1 = linker_patch.GetBakerCustomValue1();
- custom_value2 = linker_patch.GetBakerCustomValue2();
- break;
- case linker::LinkerPatch::Type::kCallRelative:
- // No custom values.
- break;
- default:
- LOG(FATAL) << "Unexpected patch type: " << linker_patch.GetType();
- UNREACHABLE();
- }
- return ThunkMapKey(linker_patch.GetType(), custom_value1, custom_value2);
-}
-
-ArrayRef<const uint8_t> CompiledMethodStorage::GetThunkCode(const linker::LinkerPatch& linker_patch,
- /*out*/ std::string* debug_name) {
- ThunkMapKey key = GetThunkMapKey(linker_patch);
- MutexLock lock(Thread::Current(), thunk_map_lock_);
- auto it = thunk_map_.find(key);
- if (it != thunk_map_.end()) {
- const ThunkMapValue& value = it->second;
- if (debug_name != nullptr) {
- *debug_name = value.GetDebugName();
- }
- return value.GetCode();
- } else {
- if (debug_name != nullptr) {
- *debug_name = std::string();
- }
- return ArrayRef<const uint8_t>();
- }
-}
-
-void CompiledMethodStorage::SetThunkCode(const linker::LinkerPatch& linker_patch,
- ArrayRef<const uint8_t> code,
- const std::string& debug_name) {
- DCHECK(!code.empty());
- ThunkMapKey key = GetThunkMapKey(linker_patch);
- std::vector<uint8_t, SwapAllocator<uint8_t>> code_copy(
- code.begin(), code.end(), SwapAllocator<uint8_t>(swap_space_.get()));
- ThunkMapValue value(std::move(code_copy), debug_name);
- MutexLock lock(Thread::Current(), thunk_map_lock_);
- // Note: Multiple threads can try and compile the same thunk, so this may not create a new entry.
- thunk_map_.emplace(key, std::move(value));
-}
-
-} // namespace art
diff --git a/compiler/driver/compiled_method_storage.h b/compiler/driver/compiled_method_storage.h
deleted file mode 100644
index f9f34017eb..0000000000
--- a/compiler/driver/compiled_method_storage.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DRIVER_COMPILED_METHOD_STORAGE_H_
-#define ART_COMPILER_DRIVER_COMPILED_METHOD_STORAGE_H_
-
-#include <iosfwd>
-#include <map>
-#include <memory>
-
-#include "base/array_ref.h"
-#include "base/length_prefixed_array.h"
-#include "base/macros.h"
-#include "utils/dedupe_set.h"
-#include "utils/swap_space.h"
-
-namespace art {
-
-namespace linker {
-class LinkerPatch;
-} // namespace linker
-
-class CompiledMethodStorage {
- public:
- explicit CompiledMethodStorage(int swap_fd);
- ~CompiledMethodStorage();
-
- void DumpMemoryUsage(std::ostream& os, bool extended) const;
-
- void SetDedupeEnabled(bool dedupe_enabled) {
- dedupe_enabled_ = dedupe_enabled;
- }
- bool DedupeEnabled() const {
- return dedupe_enabled_;
- }
-
- SwapAllocator<void> GetSwapSpaceAllocator() {
- return SwapAllocator<void>(swap_space_.get());
- }
-
- const LengthPrefixedArray<uint8_t>* DeduplicateCode(const ArrayRef<const uint8_t>& code);
- void ReleaseCode(const LengthPrefixedArray<uint8_t>* code);
- size_t UniqueCodeEntries() const;
-
- const LengthPrefixedArray<uint8_t>* DeduplicateVMapTable(const ArrayRef<const uint8_t>& table);
- void ReleaseVMapTable(const LengthPrefixedArray<uint8_t>* table);
- size_t UniqueVMapTableEntries() const;
-
- const LengthPrefixedArray<uint8_t>* DeduplicateCFIInfo(const ArrayRef<const uint8_t>& cfi_info);
- void ReleaseCFIInfo(const LengthPrefixedArray<uint8_t>* cfi_info);
- size_t UniqueCFIInfoEntries() const;
-
- const LengthPrefixedArray<linker::LinkerPatch>* DeduplicateLinkerPatches(
- const ArrayRef<const linker::LinkerPatch>& linker_patches);
- void ReleaseLinkerPatches(const LengthPrefixedArray<linker::LinkerPatch>* linker_patches);
- size_t UniqueLinkerPatchesEntries() const;
-
- // Returns the code associated with the given patch.
- // If the code has not been set, returns empty data.
- // If `debug_name` is not null, stores the associated debug name in `*debug_name`.
- ArrayRef<const uint8_t> GetThunkCode(const linker::LinkerPatch& linker_patch,
- /*out*/ std::string* debug_name = nullptr);
-
- // Sets the code and debug name associated with the given patch.
- void SetThunkCode(const linker::LinkerPatch& linker_patch,
- ArrayRef<const uint8_t> code,
- const std::string& debug_name);
-
- private:
- class ThunkMapKey;
- class ThunkMapValue;
- using ThunkMapValueType = std::pair<const ThunkMapKey, ThunkMapValue>;
- using ThunkMap = std::map<ThunkMapKey,
- ThunkMapValue,
- std::less<ThunkMapKey>,
- SwapAllocator<ThunkMapValueType>>;
- static_assert(std::is_same<ThunkMapValueType, ThunkMap::value_type>::value, "Value type check.");
-
- static ThunkMapKey GetThunkMapKey(const linker::LinkerPatch& linker_patch);
-
- template <typename T, typename DedupeSetType>
- const LengthPrefixedArray<T>* AllocateOrDeduplicateArray(const ArrayRef<const T>& data,
- DedupeSetType* dedupe_set);
-
- template <typename T>
- void ReleaseArrayIfNotDeduplicated(const LengthPrefixedArray<T>* array);
-
- // DeDuplication data structures.
- template <typename ContentType>
- class DedupeHashFunc;
-
- template <typename T>
- class LengthPrefixedArrayAlloc;
-
- template <typename T>
- using ArrayDedupeSet = DedupeSet<ArrayRef<const T>,
- LengthPrefixedArray<T>,
- LengthPrefixedArrayAlloc<T>,
- size_t,
- DedupeHashFunc<const T>,
- 4>;
-
- // Swap pool and allocator used for native allocations. May be file-backed. Needs to be first
- // as other fields rely on this.
- std::unique_ptr<SwapSpace> swap_space_;
-
- bool dedupe_enabled_;
-
- ArrayDedupeSet<uint8_t> dedupe_code_;
- ArrayDedupeSet<uint8_t> dedupe_vmap_table_;
- ArrayDedupeSet<uint8_t> dedupe_cfi_info_;
- ArrayDedupeSet<linker::LinkerPatch> dedupe_linker_patches_;
-
- Mutex thunk_map_lock_;
- ThunkMap thunk_map_ GUARDED_BY(thunk_map_lock_);
-
- DISALLOW_COPY_AND_ASSIGN(CompiledMethodStorage);
-};
-
-} // namespace art
-
-#endif // ART_COMPILER_DRIVER_COMPILED_METHOD_STORAGE_H_
diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc
deleted file mode 100644
index 05eacd848d..0000000000
--- a/compiler/driver/compiled_method_storage_test.cc
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "compiled_method_storage.h"
-
-#include <gtest/gtest.h>
-
-#include "compiled_method-inl.h"
-
-namespace art {
-
-TEST(CompiledMethodStorage, Deduplicate) {
- CompiledMethodStorage storage(/* swap_fd= */ -1);
-
- ASSERT_TRUE(storage.DedupeEnabled()); // The default.
-
- const uint8_t raw_code1[] = { 1u, 2u, 3u };
- const uint8_t raw_code2[] = { 4u, 3u, 2u, 1u };
- ArrayRef<const uint8_t> code[] = {
- ArrayRef<const uint8_t>(raw_code1),
- ArrayRef<const uint8_t>(raw_code2),
- };
- const uint8_t raw_vmap_table1[] = { 2, 4, 6 };
- const uint8_t raw_vmap_table2[] = { 7, 5, 3, 1 };
- ArrayRef<const uint8_t> vmap_table[] = {
- ArrayRef<const uint8_t>(raw_vmap_table1),
- ArrayRef<const uint8_t>(raw_vmap_table2),
- };
- const uint8_t raw_cfi_info1[] = { 1, 3, 5 };
- const uint8_t raw_cfi_info2[] = { 8, 6, 4, 2 };
- ArrayRef<const uint8_t> cfi_info[] = {
- ArrayRef<const uint8_t>(raw_cfi_info1),
- ArrayRef<const uint8_t>(raw_cfi_info2),
- };
- const linker::LinkerPatch raw_patches1[] = {
- linker::LinkerPatch::IntrinsicReferencePatch(0u, 0u, 0u),
- linker::LinkerPatch::RelativeMethodPatch(4u, nullptr, 0u, 1u),
- };
- const linker::LinkerPatch raw_patches2[] = {
- linker::LinkerPatch::IntrinsicReferencePatch(0u, 0u, 0u),
- linker::LinkerPatch::RelativeMethodPatch(4u, nullptr, 0u, 2u),
- };
- ArrayRef<const linker::LinkerPatch> patches[] = {
- ArrayRef<const linker::LinkerPatch>(raw_patches1),
- ArrayRef<const linker::LinkerPatch>(raw_patches2),
- };
-
- std::vector<CompiledMethod*> compiled_methods;
- compiled_methods.reserve(1u << 4);
- for (auto&& c : code) {
- for (auto&& v : vmap_table) {
- for (auto&& f : cfi_info) {
- for (auto&& p : patches) {
- compiled_methods.push_back(CompiledMethod::SwapAllocCompiledMethod(
- &storage, InstructionSet::kNone, c, v, f, p));
- }
- }
- }
- }
- constexpr size_t code_bit = 1u << 3;
- constexpr size_t vmap_table_bit = 1u << 2;
- constexpr size_t cfi_info_bit = 1u << 1;
- constexpr size_t patches_bit = 1u << 0;
- CHECK_EQ(compiled_methods.size(), 1u << 4);
- for (size_t i = 0; i != compiled_methods.size(); ++i) {
- for (size_t j = 0; j != compiled_methods.size(); ++j) {
- CompiledMethod* lhs = compiled_methods[i];
- CompiledMethod* rhs = compiled_methods[j];
- bool same_code = ((i ^ j) & code_bit) == 0u;
- bool same_vmap_table = ((i ^ j) & vmap_table_bit) == 0u;
- bool same_cfi_info = ((i ^ j) & cfi_info_bit) == 0u;
- bool same_patches = ((i ^ j) & patches_bit) == 0u;
- ASSERT_EQ(same_code, lhs->GetQuickCode().data() == rhs->GetQuickCode().data())
- << i << " " << j;
- ASSERT_EQ(same_vmap_table, lhs->GetVmapTable().data() == rhs->GetVmapTable().data())
- << i << " " << j;
- ASSERT_EQ(same_cfi_info, lhs->GetCFIInfo().data() == rhs->GetCFIInfo().data())
- << i << " " << j;
- ASSERT_EQ(same_patches, lhs->GetPatches().data() == rhs->GetPatches().data())
- << i << " " << j;
- }
- }
- for (CompiledMethod* method : compiled_methods) {
- CompiledMethod::ReleaseSwapAllocatedCompiledMethod(&storage, method);
- }
-}
-
-} // namespace art
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 51cd999b6d..603596f3bc 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -23,6 +23,7 @@
#include "arch/instruction_set.h"
#include "arch/instruction_set_features.h"
+#include "art_method-inl.h"
#include "base/runtime_debug.h"
#include "base/string_view_cpp20.h"
#include "base/variant_map.h"
@@ -30,12 +31,11 @@
#include "cmdline_parser.h"
#include "compiler_options_map-inl.h"
#include "dex/dex_file-inl.h"
-#include "dex/verification_results.h"
#include "runtime.h"
#include "scoped_thread_state_change-inl.h"
#include "simple_compiler_options_map.h"
-namespace art {
+namespace art HIDDEN {
CompilerOptions::CompilerOptions()
: compiler_filter_(CompilerFilter::kDefaultCompilerFilter),
@@ -48,7 +48,6 @@ CompilerOptions::CompilerOptions()
no_inline_from_(),
dex_files_for_oat_file_(),
image_classes_(),
- verification_results_(nullptr),
compiler_type_(CompilerType::kAotCompiler),
image_type_(ImageType::kNone),
multi_image_(false),
@@ -146,14 +145,34 @@ bool CompilerOptions::ParseCompilerOptions(const std::vector<std::string>& optio
bool CompilerOptions::IsImageClass(const char* descriptor) const {
// Historical note: We used to hold the set indirectly and there was a distinction between an
- // empty set and a null, null meaning to include all classes. However, the distiction has been
+ // empty set and a null, null meaning to include all classes. However, the distinction has been
// removed; if we don't have a profile, we treat it as an empty set of classes. b/77340429
return image_classes_.find(std::string_view(descriptor)) != image_classes_.end();
}
-const VerificationResults* CompilerOptions::GetVerificationResults() const {
- DCHECK(Runtime::Current()->IsAotCompiler());
- return verification_results_;
+bool CompilerOptions::IsPreloadedClass(const char* pretty_descriptor) const {
+ return preloaded_classes_.find(std::string_view(pretty_descriptor)) != preloaded_classes_.end();
+}
+
+bool CompilerOptions::ShouldCompileWithClinitCheck(ArtMethod* method) const {
+ if (method != nullptr &&
+ Runtime::Current()->IsAotCompiler() &&
+ method->IsStatic() &&
+ !method->IsConstructor() &&
+ // Compiled code for native methods never do a clinit check, so we may put the resolution
+ // trampoline for native methods. This means that it's possible post zygote fork for the
+ // entry to be dirtied. We could resolve this by either:
+ // - Make these methods use the generic JNI entrypoint, but that's not
+ // desirable for a method that is in the profile.
+ // - Ensure the declaring class of such native methods are always in the
+ // preloaded-classes list.
+ // - Emit the clinit check in the compiled code of native methods.
+ !method->IsNative()) {
+ ScopedObjectAccess soa(Thread::Current());
+ ObjPtr<mirror::Class> cls = method->GetDeclaringClass<kWithoutReadBarrier>();
+ return cls->IsInBootImageAndNotInPreloadedClasses();
+ }
+ return false;
}
} // namespace art
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 1bffdb11ed..c8a41ce24b 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -30,7 +30,7 @@
#include "base/utils.h"
#include "optimizing/register_allocator.h"
-namespace art {
+namespace art HIDDEN {
namespace jit {
class JitCompiler;
@@ -44,11 +44,11 @@ namespace linker {
class Arm64RelativePatcherTest;
} // namespace linker
+class ArtMethod;
class DexFile;
enum class InstructionSet;
class InstructionSetFeatures;
class ProfileCompilationInfo;
-class VerificationResults;
// Enum for CheckProfileMethodsCompiled. Outside CompilerOptions so it can be forward-declared.
enum class ProfileMethodsCheck : uint8_t {
@@ -83,8 +83,8 @@ class CompilerOptions final {
kAppImage, // Creating app image.
};
- CompilerOptions();
- ~CompilerOptions();
+ EXPORT CompilerOptions();
+ EXPORT ~CompilerOptions();
CompilerFilter::Filter GetCompilerFilter() const {
return compiler_filter_;
@@ -114,12 +114,10 @@ class CompilerOptions final {
return compiler_filter_ == CompilerFilter::kAssumeVerified;
}
- bool VerifyAtRuntime() const {
- return compiler_filter_ == CompilerFilter::kExtract;
- }
-
bool IsAnyCompilationEnabled() const {
- return CompilerFilter::IsAnyCompilationEnabled(compiler_filter_);
+ return CompilerFilter::IsAnyCompilationEnabled(compiler_filter_) &&
+ // TODO(riscv64): remove this when we have compiler support for RISC-V
+ GetInstructionSet() != InstructionSet::kRiscv64;
}
size_t GetHugeMethodThreshold() const {
@@ -298,9 +296,11 @@ class CompilerOptions final {
return image_classes_;
}
- bool IsImageClass(const char* descriptor) const;
+ EXPORT bool IsImageClass(const char* descriptor) const;
- const VerificationResults* GetVerificationResults() const;
+ // Returns whether the given `pretty_descriptor` is in the list of preloaded
+ // classes. `pretty_descriptor` should be the result of calling `PrettyDescriptor`.
+ EXPORT bool IsPreloadedClass(const char* pretty_descriptor) const;
bool ParseCompilerOptions(const std::vector<std::string>& options,
bool ignore_unrecognized,
@@ -383,9 +383,15 @@ class CompilerOptions final {
return ContainsElement(GetDexFilesForOatFile(), dex_file);
}
+ // If this is a static non-constructor method in the boot classpath, and its class isn't
+ // initialized at compile-time, or won't be initialized by the zygote, add
+ // initialization checks at entry. This will avoid the need of trampolines
+ // which at runtime we will need to dirty after initialization.
+ EXPORT bool ShouldCompileWithClinitCheck(ArtMethod* method) const;
+
private:
- bool ParseDumpInitFailures(const std::string& option, std::string* error_msg);
- bool ParseRegisterAllocationStrategy(const std::string& option, std::string* error_msg);
+ EXPORT bool ParseDumpInitFailures(const std::string& option, std::string* error_msg);
+ EXPORT bool ParseRegisterAllocationStrategy(const std::string& option, std::string* error_msg);
CompilerFilter::Filter compiler_filter_;
size_t huge_method_threshold_;
@@ -408,8 +414,9 @@ class CompilerOptions final {
// Must not be empty for real boot image, only for tests pretending to compile boot image.
HashSet<std::string> image_classes_;
- // Results of AOT verification.
- const VerificationResults* verification_results_;
+ // Classes listed in the preloaded-classes file, used for boot image and
+ // boot image extension compilation.
+ HashSet<std::string> preloaded_classes_;
CompilerType compiler_type_;
ImageType image_type_;
diff --git a/compiler/driver/compiler_options_map-inl.h b/compiler/driver/compiler_options_map-inl.h
index fcbc0f2f5c..79a59625f5 100644
--- a/compiler/driver/compiler_options_map-inl.h
+++ b/compiler/driver/compiler_options_map-inl.h
@@ -29,7 +29,7 @@
#include "cmdline_parser.h"
#include "compiler_options.h"
-namespace art {
+namespace art HIDDEN {
template <>
struct CmdlineType<CompilerFilter::Filter> : CmdlineTypeParser<CompilerFilter::Filter> {
@@ -118,6 +118,7 @@ inline bool ReadCompilerOptions(Base& map, CompilerOptions* options, std::string
template <typename Map, typename Builder>
inline void AddCompilerOptionsArgumentParserOptions(Builder& b) {
+ // clang-format off
b.
Define("--compiler-filter=_")
.template WithType<CompilerFilter::Filter>()
@@ -256,6 +257,7 @@ inline void AddCompilerOptionsArgumentParserOptions(Builder& b) {
.template WithType<unsigned int>()
.WithHelp("Maximum solid block size for compressed images.")
.IntoKey(Map::MaxImageBlockSize);
+ // clang-format on
}
#pragma GCC diagnostic pop
diff --git a/compiler/driver/compiler_options_map.h b/compiler/driver/compiler_options_map.h
index 7e2f8466e0..b2dd57d00e 100644
--- a/compiler/driver/compiler_options_map.h
+++ b/compiler/driver/compiler_options_map.h
@@ -21,10 +21,11 @@
#include <vector>
#include "base/compiler_filter.h"
+#include "base/macros.h"
#include "base/variant_map.h"
#include "cmdline_types.h"
-namespace art {
+namespace art HIDDEN {
enum class ProfileMethodsCheck : uint8_t;
diff --git a/compiler/driver/dex_compilation_unit.cc b/compiler/driver/dex_compilation_unit.cc
index 0d0f074917..ccebfa9c07 100644
--- a/compiler/driver/dex_compilation_unit.cc
+++ b/compiler/driver/dex_compilation_unit.cc
@@ -25,7 +25,7 @@
#include "mirror/dex_cache.h"
#include "scoped_thread_state_change-inl.h"
-namespace art {
+namespace art HIDDEN {
DexCompilationUnit::DexCompilationUnit(Handle<mirror::ClassLoader> class_loader,
ClassLinker* class_linker,
diff --git a/compiler/driver/dex_compilation_unit.h b/compiler/driver/dex_compilation_unit.h
index def90fa4e1..d595c0a4b2 100644
--- a/compiler/driver/dex_compilation_unit.h
+++ b/compiler/driver/dex_compilation_unit.h
@@ -20,11 +20,12 @@
#include <stdint.h>
#include "base/arena_object.h"
+#include "base/macros.h"
#include "dex/code_item_accessors.h"
#include "dex/dex_file.h"
#include "handle.h"
-namespace art {
+namespace art HIDDEN {
namespace mirror {
class Class;
class ClassLoader;
diff --git a/compiler/driver/simple_compiler_options_map.h b/compiler/driver/simple_compiler_options_map.h
index e7a51a4995..6663c0c509 100644
--- a/compiler/driver/simple_compiler_options_map.h
+++ b/compiler/driver/simple_compiler_options_map.h
@@ -23,9 +23,10 @@
#include <memory>
#include "compiler_options_map-inl.h"
+#include "base/macros.h"
#include "base/variant_map.h"
-namespace art {
+namespace art HIDDEN {
template <typename TValue>
struct SimpleParseArgumentMapKey : VariantMapKey<TValue> {
diff --git a/compiler/exception_test.cc b/compiler/exception_test.cc
index 495398b4b3..82c4998217 100644
--- a/compiler/exception_test.cc
+++ b/compiler/exception_test.cc
@@ -14,6 +14,8 @@
* limitations under the License.
*/
+#include <android-base/test_utils.h>
+
#include <memory>
#include <type_traits>
@@ -22,6 +24,7 @@
#include "base/callee_save_type.h"
#include "base/enums.h"
#include "base/leb128.h"
+#include "base/macros.h"
#include "base/malloc_arena_pool.h"
#include "class_linker.h"
#include "common_runtime_test.h"
@@ -42,7 +45,7 @@
#include "scoped_thread_state_change-inl.h"
#include "thread.h"
-namespace art {
+namespace art HIDDEN {
class ExceptionTest : public CommonRuntimeTest {
protected:
@@ -78,7 +81,12 @@ class ExceptionTest : public CommonRuntimeTest {
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stack_maps(&allocator, kRuntimeISA);
- stack_maps.BeginMethod(4 * sizeof(void*), 0u, 0u, 0u);
+ stack_maps.BeginMethod(/* frame_size_in_bytes= */ 4 * sizeof(void*),
+ /* core_spill_mask= */ 0u,
+ /* fp_spill_mask= */ 0u,
+ /* num_dex_registers= */ 0u,
+ /* baseline= */ false,
+ /* debuggable= */ false);
stack_maps.BeginStackMapEntry(kDexPc, native_pc_offset);
stack_maps.EndStackMapEntry();
stack_maps.EndMethod(code_size);
@@ -86,7 +94,7 @@ class ExceptionTest : public CommonRuntimeTest {
const size_t stack_maps_size = stack_map.size();
const size_t header_size = sizeof(OatQuickMethodHeader);
- const size_t code_alignment = GetInstructionSetAlignment(kRuntimeISA);
+ const size_t code_alignment = GetInstructionSetCodeAlignment(kRuntimeISA);
fake_header_code_and_maps_.resize(stack_maps_size + header_size + code_size + code_alignment);
// NB: The start of the vector might not have been allocated the desired alignment.
@@ -187,15 +195,24 @@ TEST_F(ExceptionTest, StackTraceElement) {
fake_stack.push_back(0);
}
- fake_stack.push_back(method_g_->GetOatQuickMethodHeader(0)->ToNativeQuickPc(
- method_g_, kDexPc, /* is_for_catch_handler= */ false)); // return pc
+ OatQuickMethodHeader* header = OatQuickMethodHeader::FromEntryPoint(
+ method_g_->GetEntryPointFromQuickCompiledCode());
+ // Untag native pc when running with hwasan since the pcs on the stack aren't tagged and we use
+ // this to create a fake stack. See OatQuickMethodHeader::Contains where we untag code pointers
+ // before comparing it with the PC from the stack.
+ uintptr_t native_pc = header->ToNativeQuickPc(method_g_, kDexPc);
+ if (running_with_hwasan()) {
+ // TODO(228989263): Use HWASanUntag once we have a hwasan target for tests too. HWASanUntag
+ // uses static checks which won't work if we don't have a dedicated target.
+ native_pc = (native_pc & ((1ULL << 56) - 1));
+ }
+ fake_stack.push_back(native_pc); // return pc
// Create/push fake 16byte stack frame for method g
fake_stack.push_back(reinterpret_cast<uintptr_t>(method_g_));
fake_stack.push_back(0);
fake_stack.push_back(0);
- fake_stack.push_back(method_g_->GetOatQuickMethodHeader(0)->ToNativeQuickPc(
- method_g_, kDexPc, /* is_for_catch_handler= */ false)); // return pc
+ fake_stack.push_back(native_pc); // return pc.
// Create/push fake 16byte stack frame for method f
fake_stack.push_back(reinterpret_cast<uintptr_t>(method_f_));
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 7002636d4e..e67236769e 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -34,13 +34,17 @@
#include "jit/jit_code_cache.h"
#include "jit/jit_logger.h"
-namespace art {
+namespace art HIDDEN {
namespace jit {
JitCompiler* JitCompiler::Create() {
return new JitCompiler();
}
+void JitCompiler::SetDebuggableCompilerOption(bool value) {
+ compiler_options_->SetDebuggable(value);
+}
+
void JitCompiler::ParseCompilerOptions() {
// Special case max code units for inlining, whose default is "unset" (implictly
// meaning no limit). Do this before parsing the actual passed options.
@@ -85,7 +89,7 @@ void JitCompiler::ParseCompilerOptions() {
if (StartsWith(option, "--instruction-set-variant=")) {
const char* str = option.c_str() + strlen("--instruction-set-variant=");
VLOG(compiler) << "JIT instruction set variant " << str;
- instruction_set_features = InstructionSetFeatures::FromVariant(
+ instruction_set_features = InstructionSetFeatures::FromVariantAndHwcap(
instruction_set, str, &error_msg);
if (instruction_set_features == nullptr) {
LOG(WARNING) << "Error parsing " << option << " message=" << error_msg;
@@ -121,7 +125,7 @@ void JitCompiler::ParseCompilerOptions() {
}
}
-extern "C" JitCompilerInterface* jit_load() {
+EXPORT extern "C" JitCompilerInterface* jit_load() {
VLOG(jit) << "Create jit compiler";
auto* const jit_compiler = JitCompiler::Create();
CHECK(jit_compiler != nullptr);
@@ -199,6 +203,8 @@ bool JitCompiler::CompileMethod(
VLOG(jit) << "Compilation of " << method->PrettyMethod() << " took "
<< PrettyDuration(UsToNs(duration_us));
runtime->GetMetrics()->JitMethodCompileCount()->AddOne();
+ runtime->GetMetrics()->JitMethodCompileTotalTimeDelta()->Add(duration_us);
+ runtime->GetMetrics()->JitMethodCompileCountDelta()->AddOne();
}
// Trim maps to reduce memory usage.
diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h
index 8e9966db0e..5a919fb612 100644
--- a/compiler/jit/jit_compiler.h
+++ b/compiler/jit/jit_compiler.h
@@ -17,12 +17,13 @@
#ifndef ART_COMPILER_JIT_JIT_COMPILER_H_
#define ART_COMPILER_JIT_JIT_COMPILER_H_
+#include "base/macros.h"
#include "base/mutex.h"
#include "compilation_kind.h"
#include "jit/jit.h"
-namespace art {
+namespace art HIDDEN {
class ArtMethod;
class Compiler;
@@ -50,6 +51,8 @@ class JitCompiler : public JitCompilerInterface {
bool IsBaselineCompiler() const override;
+ void SetDebuggableCompilerOption(bool val) override;
+
bool GenerateDebugInfo() override;
void ParseCompilerOptions() override;
diff --git a/compiler/jit/jit_logger.cc b/compiler/jit/jit_logger.cc
index 6b9453f525..32845260f3 100644
--- a/compiler/jit/jit_logger.cc
+++ b/compiler/jit/jit_logger.cc
@@ -24,7 +24,7 @@
#include "jit/jit_code_cache.h"
#include "oat_file-inl.h"
-namespace art {
+namespace art HIDDEN {
namespace jit {
#ifdef ART_TARGET_ANDROID
diff --git a/compiler/jit/jit_logger.h b/compiler/jit/jit_logger.h
index f4ef75a5fe..9d1f3073fa 100644
--- a/compiler/jit/jit_logger.h
+++ b/compiler/jit/jit_logger.h
@@ -19,11 +19,11 @@
#include <memory>
+#include "base/macros.h"
#include "base/mutex.h"
#include "base/os.h"
-#include "compiled_method.h"
-namespace art {
+namespace art HIDDEN {
class ArtMethod;
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
index 9e3bb86fb1..70cf2d4eb0 100644
--- a/compiler/jni/jni_cfi_test.cc
+++ b/compiler/jni/jni_cfi_test.cc
@@ -20,6 +20,7 @@
#include "arch/instruction_set.h"
#include "base/arena_allocator.h"
#include "base/enums.h"
+#include "base/macros.h"
#include "base/malloc_arena_pool.h"
#include "cfi_test.h"
#include "gtest/gtest.h"
@@ -30,7 +31,7 @@
#include "jni/jni_cfi_test_expected.inc"
-namespace art {
+namespace art HIDDEN {
// Run the tests only on host.
#ifndef ART_TARGET_ANDROID
@@ -124,22 +125,31 @@ class JNICFITest : public CFITest {
TestImpl(InstructionSet::isa, #isa, expected_asm, expected_cfi); \
}
+// We can't use compile-time macros for read-barrier as the introduction
+// of userfaultfd-GC has made it a runtime choice.
+#define TEST_ISA_ONLY_CC(isa) \
+ TEST_F(JNICFITest, isa) { \
+ if (kUseBakerReadBarrier && gUseReadBarrier) { \
+ std::vector<uint8_t> expected_asm(expected_asm_##isa, \
+ expected_asm_##isa + arraysize(expected_asm_##isa)); \
+ std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \
+ expected_cfi_##isa + arraysize(expected_cfi_##isa)); \
+ TestImpl(InstructionSet::isa, #isa, expected_asm, expected_cfi); \
+ } \
+ }
+
#ifdef ART_ENABLE_CODEGEN_arm
// Run the tests for ARM only with Baker read barriers, as the
// expected generated code contains a Marking Register refresh
// instruction.
-#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
-TEST_ISA(kThumb2)
-#endif
+TEST_ISA_ONLY_CC(kThumb2)
#endif
#ifdef ART_ENABLE_CODEGEN_arm64
// Run the tests for ARM64 only with Baker read barriers, as the
// expected generated code contains a Marking Register refresh
// instruction.
-#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
-TEST_ISA(kArm64)
-#endif
+TEST_ISA_ONLY_CC(kArm64)
#endif
#ifdef ART_ENABLE_CODEGEN_x86
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 0a1f017828..397db251b8 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -22,11 +22,13 @@
#include "art_method-inl.h"
#include "base/bit_utils.h"
#include "base/casts.h"
+#include "base/macros.h"
#include "base/mem_map.h"
#include "class_linker.h"
#include "common_compiler_test.h"
#include "compiler.h"
#include "dex/dex_file.h"
+#include "driver/compiler_options.h"
#include "entrypoints/entrypoint_utils-inl.h"
#include "gtest/gtest.h"
#include "indirect_reference_table.h"
@@ -43,7 +45,7 @@
#include "oat_quick_method_header.h"
#include "runtime.h"
#include "scoped_thread_state_change-inl.h"
-#include "thread.h"
+#include "thread-inl.h"
extern "C" JNIEXPORT jint JNICALL Java_MyClassNatives_bar(JNIEnv*, jobject, jint count) {
return count + 1;
@@ -71,7 +73,7 @@ extern "C" JNIEXPORT jint JNICALL Java_MyClassNatives_sbar_1Critical(jint count)
// TODO: In the Baker read barrier configuration, add checks to ensure
// the Marking Register's value is correct.
-namespace art {
+namespace art HIDDEN {
enum class JniKind {
kNormal, // Regular kind of un-annotated natives.
@@ -236,13 +238,14 @@ class JniCompilerTest : public CommonCompilerTest {
bool direct,
const char* method_name,
const char* method_sig) {
- ScopedObjectAccess soa(Thread::Current());
- StackHandleScope<2> hs(soa.Self());
+ Thread* self = Thread::Current();
+ ScopedObjectAccess soa(self);
+ StackHandleScope<2> hs(self);
Handle<mirror::ClassLoader> loader(
hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader)));
// Compile the native method before starting the runtime
Handle<mirror::Class> c =
- hs.NewHandle(class_linker_->FindClass(soa.Self(), "LMyClassNatives;", loader));
+ hs.NewHandle(class_linker_->FindClass(self, "LMyClassNatives;", loader));
const auto pointer_size = class_linker_->GetImagePointerSize();
ArtMethod* method = c->FindClassMethod(method_name, method_sig, pointer_size);
ASSERT_TRUE(method != nullptr) << method_name << " " << method_sig;
@@ -251,8 +254,11 @@ class JniCompilerTest : public CommonCompilerTest {
// Class initialization could replace the entrypoint, so force
// the initialization before we set up the entrypoint below.
class_linker_->EnsureInitialized(
- soa.Self(), c, /*can_init_fields=*/ true, /*can_init_parents=*/ true);
- class_linker_->MakeInitializedClassesVisiblyInitialized(soa.Self(), /*wait=*/ true);
+ self, c, /*can_init_fields=*/ true, /*can_init_parents=*/ true);
+ {
+ ScopedThreadSuspension sts(self, ThreadState::kNative);
+ class_linker_->MakeInitializedClassesVisiblyInitialized(self, /*wait=*/ true);
+ }
}
if (check_generic_jni_) {
method->SetEntryPointFromQuickCompiledCode(class_linker_->GetRuntimeQuickGenericJniStub());
@@ -402,7 +408,7 @@ jobject JniCompilerTest::jobj_;
jobject JniCompilerTest::class_loader_;
void JniCompilerTest::AssertCallerObjectLocked(JNIEnv* env) {
- Thread* self = down_cast<JNIEnvExt*>(env)->GetSelf();
+ Thread* self = Thread::ForEnv(env);
CHECK_EQ(self, Thread::Current());
ScopedObjectAccess soa(self);
ArtMethod** caller_frame = self->GetManagedStack()->GetTopQuickFrame();
@@ -414,7 +420,7 @@ void JniCompilerTest::AssertCallerObjectLocked(JNIEnv* env) {
CHECK(!caller->IsCriticalNative());
CHECK(caller->IsSynchronized());
ObjPtr<mirror::Object> lock;
- if (self->GetManagedStack()->GetTopQuickFrameTag()) {
+ if (self->GetManagedStack()->GetTopQuickFrameGenericJniTag()) {
// Generic JNI.
lock = GetGenericJniSynchronizationObject(self, caller);
} else if (caller->IsStatic()) {
@@ -845,6 +851,7 @@ jlong Java_MyClassNatives_fooJJ_synchronized(JNIEnv* env, jobject, jlong x, jlon
return x | y;
}
+EXPORT // Defined in `libart.so`.
void InitEntryPoints(JniEntryPoints* jpoints,
QuickEntryPoints* qpoints,
bool monitor_jni_entry_exit);
@@ -1307,7 +1314,7 @@ void JniCompilerTest::ExceptionHandlingImpl() {
CompileForTestWithCurrentJni(class_loader_, false, "synchronizedThrowException", "()V");
}
}
- // Start runtime to avoid re-initialization in SetupForTest.
+ // Start runtime to avoid re-initialization in SetUpForTest.
Thread::Current()->TransitionFromSuspendedToRunnable();
bool started = runtime_->Start();
CHECK(started);
@@ -1547,6 +1554,10 @@ jobject Java_MyClassNatives_staticMethodThatShouldReturnClass(JNIEnv* env, jclas
}
void JniCompilerTest::UpcallReturnTypeChecking_InstanceImpl() {
+ // Set debuggable so that the JNI compiler does not emit a fast-path that would skip the
+ // runtime call where we do these checks. Note that while normal gtests use the debug build
+ // which disables the fast path, `art_standalone_compiler_tests` run in the release build.
+ compiler_options_->SetDebuggable(true);
SetUpForTest(false, "instanceMethodThatShouldReturnClass", "()Ljava/lang/Class;",
CURRENT_JNI_WRAPPER(Java_MyClassNatives_instanceMethodThatShouldReturnClass));
@@ -1574,6 +1585,10 @@ void JniCompilerTest::UpcallReturnTypeChecking_InstanceImpl() {
JNI_TEST(UpcallReturnTypeChecking_Instance)
void JniCompilerTest::UpcallReturnTypeChecking_StaticImpl() {
+ // Set debuggable so that the JNI compiler does not emit a fast-path that would skip the
+ // runtime call where we do these checks. Note that while normal gtests use the debug build
+ // which disables the fast path, `art_standalone_compiler_tests` run in the release build.
+ compiler_options_->SetDebuggable(true);
SetUpForTest(true, "staticMethodThatShouldReturnClass", "()Ljava/lang/Class;",
CURRENT_JNI_WRAPPER(Java_MyClassNatives_staticMethodThatShouldReturnClass));
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index c1afdb8238..d81ca77b74 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -23,7 +23,7 @@
#include "base/macros.h"
#include "utils/arm/managed_register_arm.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
//
@@ -199,6 +199,10 @@ ManagedRegister ArmManagedRuntimeCallingConvention::MethodRegister() {
return ArmManagedRegister::FromCoreRegister(R0);
}
+ManagedRegister ArmManagedRuntimeCallingConvention::ArgumentRegisterForMethodExitHook() {
+ return ArmManagedRegister::FromCoreRegister(R2);
+}
+
void ArmManagedRuntimeCallingConvention::ResetIterator(FrameOffset displacement) {
ManagedRuntimeCallingConvention::ResetIterator(displacement);
gpr_index_ = 1u; // Skip r0 for ArtMethod*
diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h
index 4526d9e759..3a09d4eaad 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.h
+++ b/compiler/jni/quick/arm/calling_convention_arm.h
@@ -18,9 +18,10 @@
#define ART_COMPILER_JNI_QUICK_ARM_CALLING_CONVENTION_ARM_H_
#include "base/enums.h"
+#include "base/macros.h"
#include "jni/quick/calling_convention.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
class ArmManagedRuntimeCallingConvention final : public ManagedRuntimeCallingConvention {
@@ -39,6 +40,7 @@ class ArmManagedRuntimeCallingConvention final : public ManagedRuntimeCallingCon
void ResetIterator(FrameOffset displacement) override;
// Managed runtime calling convention
ManagedRegister MethodRegister() override;
+ ManagedRegister ArgumentRegisterForMethodExitHook() override;
void Next() override;
bool IsCurrentParamInRegister() override;
bool IsCurrentParamOnStack() override;
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index ec77db3dcb..e716502911 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -22,7 +22,7 @@
#include "arch/instruction_set.h"
#include "utils/arm64/managed_register_arm64.h"
-namespace art {
+namespace art HIDDEN {
namespace arm64 {
static constexpr ManagedRegister kXArgumentRegisters[] = {
@@ -174,6 +174,10 @@ ManagedRegister Arm64ManagedRuntimeCallingConvention::MethodRegister() {
return Arm64ManagedRegister::FromXRegister(X0);
}
+ManagedRegister Arm64ManagedRuntimeCallingConvention::ArgumentRegisterForMethodExitHook() {
+ return Arm64ManagedRegister::FromXRegister(X4);
+}
+
bool Arm64ManagedRuntimeCallingConvention::IsCurrentParamInRegister() {
if (IsCurrentParamAFloatOrDouble()) {
return itr_float_and_doubles_ < kMaxFloatOrDoubleRegisterArguments;
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h
index 176271e3dc..f29eb15fa8 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.h
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.h
@@ -18,9 +18,10 @@
#define ART_COMPILER_JNI_QUICK_ARM64_CALLING_CONVENTION_ARM64_H_
#include "base/enums.h"
+#include "base/macros.h"
#include "jni/quick/calling_convention.h"
-namespace art {
+namespace art HIDDEN {
namespace arm64 {
class Arm64ManagedRuntimeCallingConvention final : public ManagedRuntimeCallingConvention {
@@ -35,6 +36,7 @@ class Arm64ManagedRuntimeCallingConvention final : public ManagedRuntimeCallingC
ManagedRegister ReturnRegister() const override;
// Managed runtime calling convention
ManagedRegister MethodRegister() override;
+ ManagedRegister ArgumentRegisterForMethodExitHook() override;
bool IsCurrentParamInRegister() override;
bool IsCurrentParamOnStack() override;
ManagedRegister CurrentParamRegister() override;
diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc
index eb4d3724ee..2b9da6ba1a 100644
--- a/compiler/jni/quick/calling_convention.cc
+++ b/compiler/jni/quick/calling_convention.cc
@@ -37,7 +37,7 @@
#include "jni/quick/x86_64/calling_convention_x86_64.h"
#endif
-namespace art {
+namespace art HIDDEN {
// Managed runtime calling convention
@@ -74,6 +74,10 @@ std::unique_ptr<ManagedRuntimeCallingConvention> ManagedRuntimeCallingConvention
is_static, is_synchronized, shorty));
#endif
default:
+ UNUSED(allocator);
+ UNUSED(is_static);
+ UNUSED(is_synchronized);
+ UNUSED(shorty);
LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
UNREACHABLE();
}
@@ -165,6 +169,12 @@ std::unique_ptr<JniCallingConvention> JniCallingConvention::Create(ArenaAllocato
is_static, is_synchronized, is_fast_native, is_critical_native, shorty));
#endif
default:
+ UNUSED(allocator);
+ UNUSED(is_static);
+ UNUSED(is_synchronized);
+ UNUSED(is_fast_native);
+ UNUSED(is_critical_native);
+ UNUSED(shorty);
LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
UNREACHABLE();
}
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index e2f3bfb78c..0187b14256 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -20,11 +20,12 @@
#include "base/arena_object.h"
#include "base/array_ref.h"
#include "base/enums.h"
+#include "base/macros.h"
#include "dex/primitive.h"
#include "thread.h"
#include "utils/managed_register.h"
-namespace art {
+namespace art HIDDEN {
enum class InstructionSet;
@@ -244,6 +245,11 @@ class ManagedRuntimeCallingConvention : public CallingConvention {
// Register that holds the incoming method argument
virtual ManagedRegister MethodRegister() = 0;
+ // Register that is used to pass frame size for method exit hook call. This
+ // shouldn't be the same as the return register since method exit hook also expects
+ // return values in the return register.
+ virtual ManagedRegister ArgumentRegisterForMethodExitHook() = 0;
+
// Iterator interface
bool HasNext();
virtual void Next();
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 6cb50211e1..c60d97467e 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -36,7 +36,9 @@
#include "dex/dex_file-inl.h"
#include "driver/compiler_options.h"
#include "entrypoints/quick/quick_entrypoints.h"
+#include "instrumentation.h"
#include "jni/jni_env_ext.h"
+#include "runtime.h"
#include "thread.h"
#include "utils/arm/managed_register_arm.h"
#include "utils/arm64/managed_register_arm64.h"
@@ -47,7 +49,7 @@
#define __ jni_asm->
-namespace art {
+namespace art HIDDEN {
constexpr size_t kIRTCookieSize = JniCallingConvention::SavedLocalReferenceCookieSize();
@@ -68,6 +70,12 @@ static void SetNativeParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
ManagedRegister in_reg);
template <PointerSize kPointerSize>
+static void CallDecodeReferenceResult(JNIMacroAssembler<kPointerSize>* jni_asm,
+ JniCallingConvention* jni_conv,
+ ManagedRegister mr_return_reg,
+ size_t main_out_arg_size);
+
+template <PointerSize kPointerSize>
static std::unique_ptr<JNIMacroAssembler<kPointerSize>> GetMacroAssembler(
ArenaAllocator* allocator, InstructionSet isa, const InstructionSetFeatures* features) {
return JNIMacroAssembler<kPointerSize>::Create(allocator, isa, features);
@@ -101,6 +109,24 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
// i.e. if the method was annotated with @CriticalNative
const bool is_critical_native = (access_flags & kAccCriticalNative) != 0u;
+ bool is_debuggable = compiler_options.GetDebuggable();
+ bool needs_entry_exit_hooks = is_debuggable && compiler_options.IsJitCompiler();
+ // We don't support JITing stubs for critical native methods in debuggable runtimes yet.
+ // TODO(mythria): Add support required for calling method entry / exit hooks from critical native
+ // methods.
+ DCHECK_IMPLIES(needs_entry_exit_hooks, !is_critical_native);
+
+ // The fast-path for decoding a reference skips CheckJNI checks, so we do not inline the
+ // decoding in debug build or for debuggable apps (both cases enable CheckJNI by default).
+ bool inline_decode_reference = !kIsDebugBuild && !is_debuggable;
+
+ // When walking the stack the top frame doesn't have a pc associated with it. We then depend on
+ // the invariant that we don't have JITed code when AOT code is available. In debuggable runtimes
+ // this invariant doesn't hold. So we tag the SP for JITed code to indentify if we are executing
+ // JITed code or AOT code. Since tagging involves additional instructions we tag only in
+ // debuggable runtimes.
+ bool should_tag_sp = needs_entry_exit_hooks;
+
VLOG(jni) << "JniCompile: Method :: "
<< dex_file.PrettyMethod(method_idx, /* with signature */ true)
<< " :: access_flags = " << std::hex << access_flags << std::dec;
@@ -182,7 +208,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
// Skip this for @CriticalNative because we're not passing a `jclass` to the native method.
std::unique_ptr<JNIMacroLabel> jclass_read_barrier_slow_path;
std::unique_ptr<JNIMacroLabel> jclass_read_barrier_return;
- if (kUseReadBarrier && is_static && LIKELY(!is_critical_native)) {
+ if (gUseReadBarrier && is_static && LIKELY(!is_critical_native)) {
jclass_read_barrier_slow_path = __ CreateLabel();
jclass_read_barrier_return = __ CreateLabel();
@@ -219,7 +245,22 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
// because garbage collections are disabled within the execution of a
// @CriticalNative method.
if (LIKELY(!is_critical_native)) {
- __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>());
+ __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>(), should_tag_sp);
+ }
+
+ // 1.5. Call any method entry hooks if required.
+ // For critical native methods, we don't JIT stubs in debuggable runtimes (see
+ // OptimizingCompiler::JitCompile).
+ // TODO(mythria): Add support to call method entry / exit hooks for critical native methods too.
+ std::unique_ptr<JNIMacroLabel> method_entry_hook_slow_path;
+ std::unique_ptr<JNIMacroLabel> method_entry_hook_return;
+ if (UNLIKELY(needs_entry_exit_hooks)) {
+ uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
+ int offset = instrumentation::Instrumentation::HaveMethodEntryListenersOffset().Int32Value();
+ method_entry_hook_slow_path = __ CreateLabel();
+ method_entry_hook_return = __ CreateLabel();
+ __ TestByteAndJumpIfNotZero(address + offset, method_entry_hook_slow_path.get());
+ __ Bind(method_entry_hook_return.get());
}
// 2. Lock the object (if synchronized) and transition out of Runnable (if normal native).
@@ -442,8 +483,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
__ Bind(transition_to_runnable_resume.get());
}
- // 5.2. For methods that return a reference, do an early exception check so that the
- // `JniDecodeReferenceResult()` in the main path does not need to check for exceptions.
+ // 5.2. For methods that return a reference, do an exception check before decoding the reference.
std::unique_ptr<JNIMacroLabel> exception_slow_path =
LIKELY(!is_critical_native) ? __ CreateLabel() : nullptr;
if (reference_return) {
@@ -462,23 +502,23 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
__ Bind(suspend_check_resume.get());
}
- // 5.4 For methods with reference return, decode the `jobject` with `JniDecodeReferenceResult()`.
+ // 5.4 For methods with reference return, decode the `jobject`, either directly
+ // or with a call to `JniDecodeReferenceResult()`.
+ std::unique_ptr<JNIMacroLabel> decode_reference_slow_path;
+ std::unique_ptr<JNIMacroLabel> decode_reference_resume;
if (reference_return) {
DCHECK(!is_critical_native);
- // We abuse the JNI calling convention here, that is guaranteed to support passing
- // two pointer arguments, `JNIEnv*` and `jclass`/`jobject`.
- main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
- ThreadOffset<kPointerSize> jni_decode_reference_result =
- QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniDecodeReferenceResult);
- // Pass result.
- SetNativeParameter(jni_asm.get(), main_jni_conv.get(), mr_conv->ReturnRegister());
- main_jni_conv->Next();
- if (main_jni_conv->IsCurrentParamInRegister()) {
- __ GetCurrentThread(main_jni_conv->CurrentParamRegister());
- __ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_decode_reference_result));
+ if (inline_decode_reference) {
+ // Decode local and JNI transition references in the main path.
+ decode_reference_slow_path = __ CreateLabel();
+ decode_reference_resume = __ CreateLabel();
+ __ DecodeJNITransitionOrLocalJObject(mr_conv->ReturnRegister(),
+ decode_reference_slow_path.get(),
+ decode_reference_resume.get());
+ __ Bind(decode_reference_resume.get());
} else {
- __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset());
- __ CallFromThread(jni_decode_reference_result);
+ CallDecodeReferenceResult<kPointerSize>(
+ jni_asm.get(), main_jni_conv.get(), mr_conv->ReturnRegister(), main_out_arg_size);
}
} // if (!is_critical_native)
@@ -532,7 +572,21 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
__ Bind(suspend_check_resume.get());
}
- // 7.5. Remove activation - need to restore callee save registers since the GC
+ // 7.5. Check if method exit hooks needs to be called
+ // For critical native methods, we don't JIT stubs in debuggable runtimes.
+ // TODO(mythria): Add support to call method entry / exit hooks for critical native methods too.
+ std::unique_ptr<JNIMacroLabel> method_exit_hook_slow_path;
+ std::unique_ptr<JNIMacroLabel> method_exit_hook_return;
+ if (UNLIKELY(needs_entry_exit_hooks)) {
+ uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
+ int offset = instrumentation::Instrumentation::RunExitHooksOffset().Int32Value();
+ method_exit_hook_slow_path = __ CreateLabel();
+ method_exit_hook_return = __ CreateLabel();
+ __ TestByteAndJumpIfNotZero(address + offset, method_exit_hook_slow_path.get());
+ __ Bind(method_exit_hook_return.get());
+ }
+
+ // 7.6. Remove activation - need to restore callee save registers since the GC
// may have changed them.
DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(current_frame_size));
if (LIKELY(!is_critical_native) || !main_jni_conv->UseTailCall()) {
@@ -547,7 +601,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
// 8.1. Read barrier slow path for the declaring class in the method for a static call.
// Skip this for @CriticalNative because we're not passing a `jclass` to the native method.
- if (kUseReadBarrier && is_static && !is_critical_native) {
+ if (gUseReadBarrier && is_static && !is_critical_native) {
__ Bind(jclass_read_barrier_slow_path.get());
// Construct slow path for read barrier:
@@ -594,7 +648,37 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
__ Jump(transition_to_runnable_resume.get());
}
- // 8.4. Suspend check slow path.
+ // 8.4. Exception poll slow path(s).
+ if (LIKELY(!is_critical_native)) {
+ __ Bind(exception_slow_path.get());
+ if (reference_return) {
+ // We performed the exception check early, so we need to adjust SP and pop IRT frame.
+ if (main_out_arg_size != 0) {
+ jni_asm->cfi().AdjustCFAOffset(main_out_arg_size);
+ __ DecreaseFrameSize(main_out_arg_size);
+ }
+ PopLocalReferenceFrame<kPointerSize>(
+ jni_asm.get(), jni_env_reg, saved_cookie_reg, callee_save_temp);
+ }
+ DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(current_frame_size));
+ __ DeliverPendingException();
+ }
+
+ // 8.5 Slow path for decoding the `jobject`.
+ if (reference_return && inline_decode_reference) {
+ __ Bind(decode_reference_slow_path.get());
+ if (main_out_arg_size != 0) {
+ jni_asm->cfi().AdjustCFAOffset(main_out_arg_size);
+ }
+ CallDecodeReferenceResult<kPointerSize>(
+ jni_asm.get(), main_jni_conv.get(), mr_conv->ReturnRegister(), main_out_arg_size);
+ __ Jump(decode_reference_resume.get());
+ if (main_out_arg_size != 0) {
+ jni_asm->cfi().AdjustCFAOffset(-main_out_arg_size);
+ }
+ }
+
+ // 8.6. Suspend check slow path.
if (UNLIKELY(is_fast_native)) {
__ Bind(suspend_check_slow_path.get());
if (reference_return && main_out_arg_size != 0) {
@@ -605,29 +689,34 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
if (reference_return) {
// Suspend check entry point overwrites top of managed stack and leaves it clobbered.
// We need to restore the top for subsequent runtime call to `JniDecodeReferenceResult()`.
- __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>());
+ __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>(), should_tag_sp);
}
if (reference_return && main_out_arg_size != 0) {
__ IncreaseFrameSize(main_out_arg_size);
- jni_asm->cfi().AdjustCFAOffset(-main_out_arg_size);
}
__ Jump(suspend_check_resume.get());
+ if (reference_return && main_out_arg_size != 0) {
+ jni_asm->cfi().AdjustCFAOffset(-main_out_arg_size);
+ }
}
- // 8.5. Exception poll slow path(s).
- if (LIKELY(!is_critical_native)) {
- __ Bind(exception_slow_path.get());
- if (reference_return) {
- // We performed the exception check early, so we need to adjust SP and pop IRT frame.
- if (main_out_arg_size != 0) {
- jni_asm->cfi().AdjustCFAOffset(main_out_arg_size);
- __ DecreaseFrameSize(main_out_arg_size);
- }
- PopLocalReferenceFrame<kPointerSize>(
- jni_asm.get(), jni_env_reg, saved_cookie_reg, callee_save_temp);
- }
- DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(current_frame_size));
- __ DeliverPendingException();
+ // 8.7. Method entry / exit hooks slow paths.
+ if (UNLIKELY(needs_entry_exit_hooks)) {
+ __ Bind(method_entry_hook_slow_path.get());
+ // Use Jni specific method entry hook that saves all the arguments. We have only saved the
+ // callee save registers at this point. So go through Jni specific stub that saves the rest
+ // of the live registers.
+ __ CallFromThread(QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEntryHook));
+ __ ExceptionPoll(exception_slow_path.get());
+ __ Jump(method_entry_hook_return.get());
+
+ __ Bind(method_exit_hook_slow_path.get());
+ // Method exit hooks is called just before tearing down the frame. So there are no live
+ // registers and we can directly call the method exit hook and don't need a Jni specific
+ // entrypoint.
+ __ Move(mr_conv->ArgumentRegisterForMethodExitHook(), managed_frame_size);
+ __ CallFromThread(QUICK_ENTRYPOINT_OFFSET(kPointerSize, pMethodExitHook));
+ __ Jump(method_exit_hook_return.get());
}
// 9. Finalize code generation.
@@ -693,6 +782,31 @@ static void SetNativeParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
}
}
+template <PointerSize kPointerSize>
+static void CallDecodeReferenceResult(JNIMacroAssembler<kPointerSize>* jni_asm,
+ JniCallingConvention* jni_conv,
+ ManagedRegister mr_return_reg,
+ size_t main_out_arg_size) {
+ // We abuse the JNI calling convention here, that is guaranteed to support passing
+ // two pointer arguments, `JNIEnv*` and `jclass`/`jobject`.
+ jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
+ ThreadOffset<kPointerSize> jni_decode_reference_result =
+ QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniDecodeReferenceResult);
+ // Pass result.
+ SetNativeParameter(jni_asm, jni_conv, mr_return_reg);
+ jni_conv->Next();
+ if (jni_conv->IsCurrentParamInRegister()) {
+ __ GetCurrentThread(jni_conv->CurrentParamRegister());
+ __ Call(jni_conv->CurrentParamRegister(), Offset(jni_decode_reference_result));
+ } else {
+ __ GetCurrentThread(jni_conv->CurrentParamStackOffset());
+ __ CallFromThread(jni_decode_reference_result);
+ }
+ // Note: If the native ABI returns the pointer in a register different from
+ // `mr_return_register`, the `JniDecodeReferenceResult` entrypoint must be
+ // a stub that moves the result to `mr_return_register`.
+}
+
JniCompiledMethod ArtQuickJniCompileMethod(const CompilerOptions& compiler_options,
uint32_t access_flags,
uint32_t method_idx,
diff --git a/compiler/jni/quick/jni_compiler.h b/compiler/jni/quick/jni_compiler.h
index 52a6f3cf02..d43b2a9917 100644
--- a/compiler/jni/quick/jni_compiler.h
+++ b/compiler/jni/quick/jni_compiler.h
@@ -21,8 +21,9 @@
#include "arch/instruction_set.h"
#include "base/array_ref.h"
+#include "base/macros.h"
-namespace art {
+namespace art HIDDEN {
class ArenaAllocator;
class ArtMethod;
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 65be92cdce..598e8e72ff 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -22,7 +22,7 @@
#include "arch/x86/jni_frame_x86.h"
#include "utils/x86/managed_register_x86.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
static constexpr ManagedRegister kManagedCoreArgumentRegisters[] = {
@@ -143,6 +143,10 @@ ManagedRegister X86ManagedRuntimeCallingConvention::MethodRegister() {
return X86ManagedRegister::FromCpuRegister(EAX);
}
+ManagedRegister X86ManagedRuntimeCallingConvention::ArgumentRegisterForMethodExitHook() {
+ return X86ManagedRegister::FromCpuRegister(EBX);
+}
+
void X86ManagedRuntimeCallingConvention::ResetIterator(FrameOffset displacement) {
ManagedRuntimeCallingConvention::ResetIterator(displacement);
gpr_arg_count_ = 1u; // Skip EAX for ArtMethod*
diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h
index cd7ef5b557..f0d663dd98 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.h
+++ b/compiler/jni/quick/x86/calling_convention_x86.h
@@ -18,9 +18,10 @@
#define ART_COMPILER_JNI_QUICK_X86_CALLING_CONVENTION_X86_H_
#include "base/enums.h"
+#include "base/macros.h"
#include "jni/quick/calling_convention.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
class X86ManagedRuntimeCallingConvention final : public ManagedRuntimeCallingConvention {
@@ -37,6 +38,7 @@ class X86ManagedRuntimeCallingConvention final : public ManagedRuntimeCallingCon
void ResetIterator(FrameOffset displacement) override;
// Managed runtime calling convention
ManagedRegister MethodRegister() override;
+ ManagedRegister ArgumentRegisterForMethodExitHook() override;
void Next() override;
bool IsCurrentParamInRegister() override;
bool IsCurrentParamOnStack() override;
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index 862ee5e2be..9d0761d2f7 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -23,7 +23,7 @@
#include "base/bit_utils.h"
#include "utils/x86_64/managed_register_x86_64.h"
-namespace art {
+namespace art HIDDEN {
namespace x86_64 {
static constexpr ManagedRegister kCoreArgumentRegisters[] = {
@@ -147,6 +147,10 @@ ManagedRegister X86_64ManagedRuntimeCallingConvention::MethodRegister() {
return X86_64ManagedRegister::FromCpuRegister(RDI);
}
+ManagedRegister X86_64ManagedRuntimeCallingConvention::ArgumentRegisterForMethodExitHook() {
+ return X86_64ManagedRegister::FromCpuRegister(R8);
+}
+
bool X86_64ManagedRuntimeCallingConvention::IsCurrentParamInRegister() {
if (IsCurrentParamAFloatOrDouble()) {
return itr_float_and_doubles_ < kMaxFloatOrDoubleRegisterArguments;
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
index 483f1f5806..859a277c60 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
@@ -18,9 +18,10 @@
#define ART_COMPILER_JNI_QUICK_X86_64_CALLING_CONVENTION_X86_64_H_
#include "base/enums.h"
+#include "base/macros.h"
#include "jni/quick/calling_convention.h"
-namespace art {
+namespace art HIDDEN {
namespace x86_64 {
class X86_64ManagedRuntimeCallingConvention final : public ManagedRuntimeCallingConvention {
@@ -35,6 +36,7 @@ class X86_64ManagedRuntimeCallingConvention final : public ManagedRuntimeCalling
ManagedRegister ReturnRegister() const override;
// Managed runtime calling convention
ManagedRegister MethodRegister() override;
+ ManagedRegister ArgumentRegisterForMethodExitHook() override;
bool IsCurrentParamInRegister() override;
bool IsCurrentParamOnStack() override;
ManagedRegister CurrentParamRegister() override;
diff --git a/compiler/libart-compiler.map b/compiler/libart-compiler.map
new file mode 100644
index 0000000000..f66052a329
--- /dev/null
+++ b/compiler/libart-compiler.map
@@ -0,0 +1,34 @@
+#
+# Copyright (C) 2022 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+ART_COMPILER {
+ global:
+ extern "C++" {
+ art::debug::MakeMiniDebugInfo*;
+ *art::debug::WriteDebugInfo*;
+ art::Compiler::Create*;
+ art::CompilerOptions::*;
+ art::CreateTrampoline*;
+ art::IntrinsicObjects::*;
+ art::linker::operator*art::linker::LinkerPatch::Type*;
+ art::operator*art::Whence*;
+ };
+
+ jit_load;
+
+ local:
+ *;
+};
diff --git a/compiler/linker/linker_patch.h b/compiler/linker/linker_patch.h
index 7da1e82d91..8ed7fce0ff 100644
--- a/compiler/linker/linker_patch.h
+++ b/compiler/linker/linker_patch.h
@@ -23,9 +23,10 @@
#include <android-base/logging.h>
#include "base/bit_utils.h"
+#include "base/macros.h"
#include "dex/method_reference.h"
-namespace art {
+namespace art HIDDEN {
class DexFile;
@@ -328,7 +329,7 @@ class LinkerPatch {
friend bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs);
friend bool operator<(const LinkerPatch& lhs, const LinkerPatch& rhs);
};
-std::ostream& operator<<(std::ostream& os, LinkerPatch::Type type);
+EXPORT std::ostream& operator<<(std::ostream& os, LinkerPatch::Type type);
inline bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs) {
return lhs.literal_offset_ == rhs.literal_offset_ &&
diff --git a/compiler/linker/linker_patch_test.cc b/compiler/linker/linker_patch_test.cc
index 997418c4f7..1c46da15e1 100644
--- a/compiler/linker/linker_patch_test.cc
+++ b/compiler/linker/linker_patch_test.cc
@@ -16,9 +16,10 @@
#include <gtest/gtest.h>
+#include "base/macros.h"
#include "linker_patch.h"
-namespace art {
+namespace art HIDDEN {
namespace linker {
TEST(LinkerPatch, LinkerPatchOperators) {
diff --git a/compiler/linker/output_stream_test.cc b/compiler/linker/output_stream_test.cc
index f1af4cb8b7..22b174fce6 100644
--- a/compiler/linker/output_stream_test.cc
+++ b/compiler/linker/output_stream_test.cc
@@ -16,17 +16,17 @@
#include <android-base/logging.h>
+#include "base/common_art_test.h"
#include "base/macros.h"
#include "base/unix_file/fd_file.h"
-#include "common_runtime_test.h"
#include "stream/buffered_output_stream.h"
#include "stream/file_output_stream.h"
#include "stream/vector_output_stream.h"
-namespace art {
+namespace art HIDDEN {
namespace linker {
-class OutputStreamTest : public CommonRuntimeTest {
+class OutputStreamTest : public CommonArtTest {
protected:
void CheckOffset(off_t expected) {
off_t actual = output_stream_->Seek(0, kSeekCurrent);
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc
index e1f061ae70..703584c537 100644
--- a/compiler/optimizing/block_builder.cc
+++ b/compiler/optimizing/block_builder.cc
@@ -22,7 +22,7 @@
#include "dex/dex_file_exception_helpers.h"
#include "quicken_info.h"
-namespace art {
+namespace art HIDDEN {
HBasicBlockBuilder::HBasicBlockBuilder(HGraph* graph,
const DexFile* const dex_file,
diff --git a/compiler/optimizing/block_builder.h b/compiler/optimizing/block_builder.h
index 42a3f327e7..8668ef8221 100644
--- a/compiler/optimizing/block_builder.h
+++ b/compiler/optimizing/block_builder.h
@@ -17,13 +17,14 @@
#ifndef ART_COMPILER_OPTIMIZING_BLOCK_BUILDER_H_
#define ART_COMPILER_OPTIMIZING_BLOCK_BUILDER_H_
+#include "base/macros.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "dex/code_item_accessors.h"
#include "dex/dex_file.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
class HBasicBlockBuilder : public ValueObject {
public:
diff --git a/compiler/optimizing/block_namer.cc b/compiler/optimizing/block_namer.cc
index d30448cd23..029e26b2be 100644
--- a/compiler/optimizing/block_namer.cc
+++ b/compiler/optimizing/block_namer.cc
@@ -18,7 +18,7 @@
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
std::ostream& BlockNamer::PrintName(std::ostream& os, HBasicBlock* blk) const {
os << "B";
diff --git a/compiler/optimizing/block_namer.h b/compiler/optimizing/block_namer.h
index ed396b9bf8..39c5973297 100644
--- a/compiler/optimizing/block_namer.h
+++ b/compiler/optimizing/block_namer.h
@@ -19,7 +19,9 @@
#include <ostream>
-namespace art {
+#include "base/macros.h"
+
+namespace art HIDDEN {
class HBasicBlock;
struct BlockNamer {
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index dad3c818fa..919abfdc49 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -24,7 +24,7 @@
#include "nodes.h"
#include "side_effects_analysis.h"
-namespace art {
+namespace art HIDDEN {
class MonotonicValueRange;
@@ -490,7 +490,7 @@ class MonotonicValueRange : public ValueRange {
DISALLOW_COPY_AND_ASSIGN(MonotonicValueRange);
};
-class BCEVisitor : public HGraphVisitor {
+class BCEVisitor final : public HGraphVisitor {
public:
// The least number of bounds checks that should be eliminated by triggering
// the deoptimization technique.
@@ -564,6 +564,19 @@ class BCEVisitor : public HGraphVisitor {
early_exit_loop_.clear();
taken_test_loop_.clear();
finite_loop_.clear();
+
+ // We may have eliminated all bounds checks so we should update the flag.
+ // TODO(solanes): Do this without a linear pass of the graph?
+ GetGraph()->SetHasBoundsChecks(false);
+ for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instruction = it.Current();
+ if (instruction->IsBoundsCheck()) {
+ GetGraph()->SetHasBoundsChecks(true);
+ return;
+ }
+ }
+ }
}
private:
@@ -1818,6 +1831,7 @@ class BCEVisitor : public HGraphVisitor {
HInstruction* condition,
bool is_null_check = false) {
HInstruction* suspend = loop->GetSuspendCheck();
+ DCHECK(suspend != nullptr);
block->InsertInstructionBefore(condition, block->GetLastInstruction());
DeoptimizationKind kind =
is_null_check ? DeoptimizationKind::kLoopNullBCE : DeoptimizationKind::kLoopBoundsBCE;
@@ -1997,7 +2011,7 @@ class BCEVisitor : public HGraphVisitor {
phi->SetRawInputAt(0, instruction);
phi->SetRawInputAt(1, zero);
if (type == DataType::Type::kReference) {
- phi->SetReferenceTypeInfo(instruction->GetReferenceTypeInfo());
+ phi->SetReferenceTypeInfoIfValid(instruction->GetReferenceTypeInfo());
}
new_preheader->AddPhi(phi);
return phi;
diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h
index ef08877daa..f210fa9127 100644
--- a/compiler/optimizing/bounds_check_elimination.h
+++ b/compiler/optimizing/bounds_check_elimination.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_BOUNDS_CHECK_ELIMINATION_H_
#define ART_COMPILER_OPTIMIZING_BOUNDS_CHECK_ELIMINATION_H_
+#include "base/macros.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
class SideEffectsAnalysis;
class HInductionVarAnalysis;
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index 5927d681b2..929a9e7fe7 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -17,6 +17,7 @@
#include "bounds_check_elimination.h"
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "gvn.h"
#include "induction_var_analysis.h"
@@ -27,7 +28,7 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
/**
* Fixture class for the BoundsCheckElimination tests.
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index e7826bbba3..48d1a9da2f 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -33,7 +33,7 @@
#include "ssa_builder.h"
#include "thread.h"
-namespace art {
+namespace art HIDDEN {
HGraphBuilder::HGraphBuilder(HGraph* graph,
const CodeItemDebugInfoAccessor& accessor,
@@ -103,7 +103,6 @@ GraphAnalysisResult HGraphBuilder::BuildGraph() {
graph_->SetNumberOfVRegs(code_item_accessor_.RegistersSize());
graph_->SetNumberOfInVRegs(code_item_accessor_.InsSize());
graph_->SetMaximumNumberOfOutVRegs(code_item_accessor_.OutsSize());
- graph_->SetHasTryCatch(code_item_accessor_.TriesSize() != 0);
// Use ScopedArenaAllocator for all local allocations.
ScopedArenaAllocator local_allocator(graph_->GetArenaStack());
@@ -168,7 +167,6 @@ void HGraphBuilder::BuildIntrinsicGraph(ArtMethod* method) {
graph_->SetNumberOfVRegs(return_vregs + num_arg_vregs);
graph_->SetNumberOfInVRegs(num_arg_vregs);
graph_->SetMaximumNumberOfOutVRegs(num_arg_vregs);
- graph_->SetHasTryCatch(false);
// Use ScopedArenaAllocator for all local allocations.
ScopedArenaAllocator local_allocator(graph_->GetArenaStack());
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 580769e0f9..ef225d9a6a 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -19,12 +19,13 @@
#include "base/arena_object.h"
#include "base/array_ref.h"
+#include "base/macros.h"
#include "dex/code_item_accessors.h"
#include "dex/dex_file-inl.h"
#include "dex/dex_file.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
class ArtMethod;
class CodeGenerator;
diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc
index c6232ef661..20a763cf6d 100644
--- a/compiler/optimizing/cha_guard_optimization.cc
+++ b/compiler/optimizing/cha_guard_optimization.cc
@@ -16,7 +16,7 @@
#include "cha_guard_optimization.h"
-namespace art {
+namespace art HIDDEN {
// Note we can only do CHA guard elimination/motion in a single pass, since
// if a guard is not removed, another guard might be removed due to
@@ -200,6 +200,7 @@ bool CHAGuardVisitor::HoistGuard(HShouldDeoptimizeFlag* flag,
block->RemoveInstruction(deopt);
HInstruction* suspend = loop_info->GetSuspendCheck();
+ DCHECK(suspend != nullptr);
// Need a new deoptimize instruction that copies the environment
// of the suspend instruction for the loop.
HDeoptimize* deoptimize = new (GetGraph()->GetAllocator()) HDeoptimize(
diff --git a/compiler/optimizing/cha_guard_optimization.h b/compiler/optimizing/cha_guard_optimization.h
index 440d51a969..5c1fdd90de 100644
--- a/compiler/optimizing/cha_guard_optimization.h
+++ b/compiler/optimizing/cha_guard_optimization.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_
#define ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_
+#include "base/macros.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
/**
* Optimize CHA guards by removing/moving them.
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 27eabafb8f..c9f42b52f5 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -15,6 +15,7 @@
*/
#include "code_generator.h"
+#include "base/globals.h"
#ifdef ART_ENABLE_CODEGEN_arm
#include "code_generator_arm_vixl.h"
@@ -24,6 +25,10 @@
#include "code_generator_arm64.h"
#endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+#include "code_generator_riscv64.h"
+#endif
+
#ifdef ART_ENABLE_CODEGEN_x86
#include "code_generator_x86.h"
#endif
@@ -39,7 +44,6 @@
#include "base/leb128.h"
#include "class_linker.h"
#include "class_root-inl.h"
-#include "compiled_method.h"
#include "dex/bytecode_utils.h"
#include "dex/code_item_accessors-inl.h"
#include "graph_visualizer.h"
@@ -61,7 +65,7 @@
#include "thread-current-inl.h"
#include "utils/assembler.h"
-namespace art {
+namespace art HIDDEN {
// Return whether a location is consistent with a type.
static bool CheckType(DataType::Type type, Location location) {
@@ -389,7 +393,8 @@ void CodeGenerator::Compile(CodeAllocator* allocator) {
core_spill_mask_,
fpu_spill_mask_,
GetGraph()->GetNumberOfVRegs(),
- GetGraph()->IsCompilingBaseline());
+ GetGraph()->IsCompilingBaseline(),
+ GetGraph()->IsDebuggable());
size_t frame_start = GetAssembler()->CodeSize();
GenerateFrameEntry();
@@ -412,7 +417,13 @@ void CodeGenerator::Compile(CodeAllocator* allocator) {
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* current = it.Current();
if (current->HasEnvironment()) {
- // Create stackmap for HNativeDebugInfo or any instruction which calls native code.
+ // Catch StackMaps are dealt with later on in `RecordCatchBlockInfo`.
+ if (block->IsCatchBlock() && block->GetFirstInstruction() == current) {
+ DCHECK(current->IsNop());
+ continue;
+ }
+
+ // Create stackmap for HNop or any instruction which calls native code.
// Note that we need correct mapping for the native PC of the call instruction,
// so the runtime's stackmap is not sufficient since it is at PC after the call.
MaybeRecordNativeDebugInfo(current, block->GetDexPc());
@@ -1030,6 +1041,9 @@ std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph,
}
#endif
default:
+ UNUSED(allocator);
+ UNUSED(graph);
+ UNUSED(stats);
return nullptr;
}
}
@@ -1041,7 +1055,8 @@ CodeGenerator::CodeGenerator(HGraph* graph,
uint32_t core_callee_save_mask,
uint32_t fpu_callee_save_mask,
const CompilerOptions& compiler_options,
- OptimizingCompilerStats* stats)
+ OptimizingCompilerStats* stats,
+ const art::ArrayRef<const bool>& unimplemented_intrinsics)
: frame_size_(0),
core_spill_mask_(0),
fpu_spill_mask_(0),
@@ -1066,7 +1081,8 @@ CodeGenerator::CodeGenerator(HGraph* graph,
is_leaf_(true),
needs_suspend_check_entry_(false),
requires_current_method_(false),
- code_generation_data_() {
+ code_generation_data_(),
+ unimplemented_intrinsics_(unimplemented_intrinsics) {
if (GetGraph()->IsCompilingOsr()) {
// Make OSR methods have all registers spilled, this simplifies the logic of
// jumping to the compiled code directly.
@@ -1123,7 +1139,7 @@ static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph,
for (HBasicBlock* block : graph.GetReversePostOrder()) {
if (block->IsLoopHeader()) {
HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck();
- if (!suspend_check->GetEnvironment()->IsFromInlinedInvoke()) {
+ if (suspend_check != nullptr && !suspend_check->GetEnvironment()->IsFromInlinedInvoke()) {
loop_headers.push_back(suspend_check);
}
}
@@ -1333,53 +1349,43 @@ void CodeGenerator::RecordCatchBlockInfo() {
continue;
}
- uint32_t dex_pc = block->GetDexPc();
- uint32_t num_vregs = graph_->GetNumberOfVRegs();
- uint32_t native_pc = GetAddressOf(block);
+ // Get the outer dex_pc. We save the full environment list for DCHECK purposes in kIsDebugBuild.
+ std::vector<uint32_t> dex_pc_list_for_verification;
+ if (kIsDebugBuild) {
+ dex_pc_list_for_verification.push_back(block->GetDexPc());
+ }
+ DCHECK(block->GetFirstInstruction()->IsNop());
+ DCHECK(block->GetFirstInstruction()->AsNop()->NeedsEnvironment());
+ HEnvironment* const environment = block->GetFirstInstruction()->GetEnvironment();
+ DCHECK(environment != nullptr);
+ HEnvironment* outer_environment = environment;
+ while (outer_environment->GetParent() != nullptr) {
+ outer_environment = outer_environment->GetParent();
+ if (kIsDebugBuild) {
+ dex_pc_list_for_verification.push_back(outer_environment->GetDexPc());
+ }
+ }
+
+ if (kIsDebugBuild) {
+ // dex_pc_list_for_verification is set from innnermost to outermost. Let's reverse it
+ // since we are expected to pass from outermost to innermost.
+ std::reverse(dex_pc_list_for_verification.begin(), dex_pc_list_for_verification.end());
+ DCHECK_EQ(dex_pc_list_for_verification.front(), outer_environment->GetDexPc());
+ }
- stack_map_stream->BeginStackMapEntry(dex_pc,
+ uint32_t native_pc = GetAddressOf(block);
+ stack_map_stream->BeginStackMapEntry(outer_environment->GetDexPc(),
native_pc,
/* register_mask= */ 0,
/* sp_mask= */ nullptr,
- StackMap::Kind::Catch);
-
- HInstruction* current_phi = block->GetFirstPhi();
- for (size_t vreg = 0; vreg < num_vregs; ++vreg) {
- while (current_phi != nullptr && current_phi->AsPhi()->GetRegNumber() < vreg) {
- HInstruction* next_phi = current_phi->GetNext();
- DCHECK(next_phi == nullptr ||
- current_phi->AsPhi()->GetRegNumber() <= next_phi->AsPhi()->GetRegNumber())
- << "Phis need to be sorted by vreg number to keep this a linear-time loop.";
- current_phi = next_phi;
- }
+ StackMap::Kind::Catch,
+ /* needs_vreg_info= */ true,
+ dex_pc_list_for_verification);
- if (current_phi == nullptr || current_phi->AsPhi()->GetRegNumber() != vreg) {
- stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0);
- } else {
- Location location = current_phi->GetLocations()->Out();
- switch (location.GetKind()) {
- case Location::kStackSlot: {
- stack_map_stream->AddDexRegisterEntry(
- DexRegisterLocation::Kind::kInStack, location.GetStackIndex());
- break;
- }
- case Location::kDoubleStackSlot: {
- stack_map_stream->AddDexRegisterEntry(
- DexRegisterLocation::Kind::kInStack, location.GetStackIndex());
- stack_map_stream->AddDexRegisterEntry(
- DexRegisterLocation::Kind::kInStack, location.GetHighStackIndex(kVRegSize));
- ++vreg;
- DCHECK_LT(vreg, num_vregs);
- break;
- }
- default: {
- // All catch phis must be allocated to a stack slot.
- LOG(FATAL) << "Unexpected kind " << location.GetKind();
- UNREACHABLE();
- }
- }
- }
- }
+ EmitEnvironment(environment,
+ /* slow_path= */ nullptr,
+ /* needs_vreg_info= */ true,
+ /* is_for_catch_handler= */ true);
stack_map_stream->EndStackMapEntry();
}
@@ -1390,7 +1396,9 @@ void CodeGenerator::AddSlowPath(SlowPathCode* slow_path) {
code_generation_data_->AddSlowPath(slow_path);
}
-void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path) {
+void CodeGenerator::EmitVRegInfo(HEnvironment* environment,
+ SlowPathCode* slow_path,
+ bool is_for_catch_handler) {
StackMapStream* stack_map_stream = GetStackMapStream();
// Walk over the environment, and record the location of dex registers.
for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) {
@@ -1445,6 +1453,7 @@ void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_p
}
case Location::kRegister : {
+ DCHECK(!is_for_catch_handler);
int id = location.reg();
if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(id)) {
uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(id);
@@ -1466,6 +1475,7 @@ void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_p
}
case Location::kFpuRegister : {
+ DCHECK(!is_for_catch_handler);
int id = location.reg();
if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(id)) {
uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(id);
@@ -1487,6 +1497,7 @@ void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_p
}
case Location::kFpuRegisterPair : {
+ DCHECK(!is_for_catch_handler);
int low = location.low();
int high = location.high();
if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(low)) {
@@ -1508,6 +1519,7 @@ void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_p
}
case Location::kRegisterPair : {
+ DCHECK(!is_for_catch_handler);
int low = location.low();
int high = location.high();
if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(low)) {
@@ -1538,9 +1550,54 @@ void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_p
}
}
+void CodeGenerator::EmitVRegInfoOnlyCatchPhis(HEnvironment* environment) {
+ StackMapStream* stack_map_stream = GetStackMapStream();
+ DCHECK(environment->GetHolder()->GetBlock()->IsCatchBlock());
+ DCHECK_EQ(environment->GetHolder()->GetBlock()->GetFirstInstruction(), environment->GetHolder());
+ HInstruction* current_phi = environment->GetHolder()->GetBlock()->GetFirstPhi();
+ for (size_t vreg = 0; vreg < environment->Size(); ++vreg) {
+ while (current_phi != nullptr && current_phi->AsPhi()->GetRegNumber() < vreg) {
+ HInstruction* next_phi = current_phi->GetNext();
+ DCHECK(next_phi == nullptr ||
+ current_phi->AsPhi()->GetRegNumber() <= next_phi->AsPhi()->GetRegNumber())
+ << "Phis need to be sorted by vreg number to keep this a linear-time loop.";
+ current_phi = next_phi;
+ }
+
+ if (current_phi == nullptr || current_phi->AsPhi()->GetRegNumber() != vreg) {
+ stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0);
+ } else {
+ Location location = current_phi->GetLocations()->Out();
+ switch (location.GetKind()) {
+ case Location::kStackSlot: {
+ stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack,
+ location.GetStackIndex());
+ break;
+ }
+ case Location::kDoubleStackSlot: {
+ stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack,
+ location.GetStackIndex());
+ stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack,
+ location.GetHighStackIndex(kVRegSize));
+ ++vreg;
+ DCHECK_LT(vreg, environment->Size());
+ break;
+ }
+ default: {
+ LOG(FATAL) << "All catch phis must be allocated to a stack slot. Unexpected kind "
+ << location.GetKind();
+ UNREACHABLE();
+ }
+ }
+ }
+ }
+}
+
void CodeGenerator::EmitEnvironment(HEnvironment* environment,
SlowPathCode* slow_path,
- bool needs_vreg_info) {
+ bool needs_vreg_info,
+ bool is_for_catch_handler,
+ bool innermost_environment) {
if (environment == nullptr) return;
StackMapStream* stack_map_stream = GetStackMapStream();
@@ -1548,7 +1605,11 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment,
if (emit_inline_info) {
// We emit the parent environment first.
- EmitEnvironment(environment->GetParent(), slow_path, needs_vreg_info);
+ EmitEnvironment(environment->GetParent(),
+ slow_path,
+ needs_vreg_info,
+ is_for_catch_handler,
+ /* innermost_environment= */ false);
stack_map_stream->BeginInlineInfoEntry(environment->GetMethod(),
environment->GetDexPc(),
needs_vreg_info ? environment->Size() : 0,
@@ -1556,9 +1617,13 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment,
this);
}
+ // If a dex register map is not required we just won't emit it.
if (needs_vreg_info) {
- // If a dex register map is not required we just won't emit it.
- EmitVRegInfo(environment, slow_path);
+ if (innermost_environment && is_for_catch_handler) {
+ EmitVRegInfoOnlyCatchPhis(environment);
+ } else {
+ EmitVRegInfo(environment, slow_path, is_for_catch_handler);
+ }
}
if (emit_inline_info) {
@@ -1671,7 +1736,7 @@ void CodeGenerator::ValidateInvokeRuntime(QuickEntrypointEnum entrypoint,
// When (non-Baker) read barriers are enabled, some instructions
// use a slow path to emit a read barrier, which does not trigger
// GC.
- (kEmitCompilerReadBarrier &&
+ (gUseReadBarrier &&
!kUseBakerReadBarrier &&
(instruction->IsInstanceFieldGet() ||
instruction->IsPredicatedInstanceFieldGet() ||
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index d81a7b5382..9872efaa4a 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -26,6 +26,7 @@
#include "base/bit_utils.h"
#include "base/enums.h"
#include "base/globals.h"
+#include "base/macros.h"
#include "base/memory_region.h"
#include "class_root.h"
#include "dex/string_reference.h"
@@ -33,13 +34,15 @@
#include "graph_visualizer.h"
#include "locations.h"
#include "nodes.h"
+#include "oat_quick_method_header.h"
#include "optimizing_compiler_stats.h"
#include "read_barrier_option.h"
#include "stack.h"
+#include "subtype_check.h"
#include "utils/assembler.h"
#include "utils/label.h"
-namespace art {
+namespace art HIDDEN {
// Binary encoding of 2^32 for type double.
static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
@@ -56,8 +59,18 @@ static int32_t constexpr kPrimIntMax = 0x7fffffff;
// Maximum value for a primitive long.
static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
-static constexpr ReadBarrierOption kCompilerReadBarrierOption =
- kEmitCompilerReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
+static const ReadBarrierOption gCompilerReadBarrierOption =
+ gUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
+
+constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+constexpr size_t status_byte_offset =
+ mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+constexpr uint32_t shifted_visibly_initialized_value =
+ enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
+constexpr uint32_t shifted_initializing_value =
+ enum_cast<uint32_t>(ClassStatus::kInitializing) << (status_lsb_position % kBitsPerByte);
+constexpr uint32_t shifted_initialized_value =
+ enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
class Assembler;
class CodeGenerator;
@@ -291,6 +304,12 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
// Returns whether we should split long moves in parallel moves.
virtual bool ShouldSplitLongMoves() const { return false; }
+ // Returns true if `invoke` is an implemented intrinsic in this codegen's arch.
+ bool IsImplementedIntrinsic(HInvoke* invoke) const {
+ return invoke->IsIntrinsic() &&
+ !unimplemented_intrinsics_[static_cast<size_t>(invoke->GetIntrinsic())];
+ }
+
size_t GetNumberOfCoreCalleeSaveRegisters() const {
return POPCOUNT(core_callee_save_mask_);
}
@@ -460,7 +479,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
// If the target class is in the boot image, it's non-moveable and it doesn't matter
// if we compare it with a from-space or to-space reference, the result is the same.
// It's OK to traverse a class hierarchy jumping between from-space and to-space.
- return kEmitCompilerReadBarrier && !instance_of->GetTargetClass()->IsInBootImage();
+ return gUseReadBarrier && !instance_of->GetTargetClass()->IsInBootImage();
}
static ReadBarrierOption ReadBarrierOptionForInstanceOf(HInstanceOf* instance_of) {
@@ -475,7 +494,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
case TypeCheckKind::kArrayObjectCheck:
case TypeCheckKind::kInterfaceCheck: {
bool needs_read_barrier =
- kEmitCompilerReadBarrier && !check_cast->GetTargetClass()->IsInBootImage();
+ gUseReadBarrier && !check_cast->GetTargetClass()->IsInBootImage();
// We do not emit read barriers for HCheckCast, so we can get false negatives
// and the slow path shall re-check and simply return if the cast is actually OK.
return !needs_read_barrier;
@@ -678,7 +697,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
return LocationSummary::kCallOnMainOnly;
case HLoadString::LoadKind::kJitTableAddress:
DCHECK(!load->NeedsEnvironment());
- return kEmitCompilerReadBarrier
+ return gUseReadBarrier
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
break;
@@ -736,7 +755,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
uint32_t core_callee_save_mask,
uint32_t fpu_callee_save_mask,
const CompilerOptions& compiler_options,
- OptimizingCompilerStats* stats);
+ OptimizingCompilerStats* stats,
+ const art::ArrayRef<const bool>& unimplemented_intrinsics);
virtual HGraphVisitor* GetLocationBuilder() = 0;
virtual HGraphVisitor* GetInstructionVisitor() = 0;
@@ -836,8 +856,11 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
void BlockIfInRegister(Location location, bool is_out = false) const;
void EmitEnvironment(HEnvironment* environment,
SlowPathCode* slow_path,
- bool needs_vreg_info = true);
- void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path);
+ bool needs_vreg_info = true,
+ bool is_for_catch_handler = false,
+ bool innermost_environment = true);
+ void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path, bool is_for_catch_handler);
+ void EmitVRegInfoOnlyCatchPhis(HEnvironment* environment);
static void PrepareCriticalNativeArgumentMoves(
HInvokeStaticOrDirect* invoke,
@@ -877,6 +900,9 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
// CodeGenerator::Compile() and remains alive until the CodeGenerator is destroyed.
std::unique_ptr<CodeGenerationData> code_generation_data_;
+ // Which intrinsics we don't have handcrafted code for.
+ art::ArrayRef<const bool> unimplemented_intrinsics_;
+
friend class OptimizingCFITest;
ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeSIMD);
ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeNoSIMD);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 2a0b481b2d..41db9a2542 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -27,7 +27,6 @@
#include "class_root-inl.h"
#include "class_table.h"
#include "code_generator_utils.h"
-#include "compiled_method.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "gc/accounting/card_table.h"
@@ -44,6 +43,7 @@
#include "mirror/var_handle.h"
#include "offsets.h"
#include "optimizing/common_arm64.h"
+#include "optimizing/nodes.h"
#include "thread.h"
#include "utils/arm64/assembler_arm64.h"
#include "utils/assembler.h"
@@ -58,7 +58,7 @@ using vixl::EmissionCheckScope;
#error "ARM64 Codegen VIXL macro-assembler macro already defined."
#endif
-namespace art {
+namespace art HIDDEN {
template<class MirrorType>
class GcRoot;
@@ -77,7 +77,6 @@ using helpers::InputFPRegisterAt;
using helpers::InputOperandAt;
using helpers::InputRegisterAt;
using helpers::Int64FromLocation;
-using helpers::IsConstantZeroBitPattern;
using helpers::LocationFrom;
using helpers::OperandFromMemOperand;
using helpers::OutputCPURegister;
@@ -583,7 +582,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
obj_(obj),
offset_(offset),
index_(index) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// If `obj` is equal to `out` or `ref`, it means the initial object
// has been overwritten by (or after) the heap object reference load
// to be instrumented, e.g.:
@@ -762,7 +761,7 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
public:
ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
: SlowPathCodeARM64(instruction), out_(out), root_(root) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
}
void EmitNativeCode(CodeGenerator* codegen) override {
@@ -825,6 +824,9 @@ class MethodEntryExitHooksSlowPathARM64 : public SlowPathCodeARM64 {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
+ if (instruction_->IsMethodExitHook()) {
+ __ Mov(vixl::aarch64::x4, arm64_codegen->GetFrameSize());
+ }
arm64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
RestoreLiveRegisters(codegen, locations);
__ B(GetExitLabel());
@@ -933,6 +935,33 @@ Location CriticalNativeCallingConventionVisitorARM64::GetMethodLocation() const
return Location::RegisterLocation(x15.GetCode());
}
+namespace detail {
+// Mark which intrinsics we don't have handcrafted code for.
+template <Intrinsics T>
+struct IsUnimplemented {
+ bool is_unimplemented = false;
+};
+
+#define TRUE_OVERRIDE(Name) \
+ template <> \
+ struct IsUnimplemented<Intrinsics::k##Name> { \
+ bool is_unimplemented = true; \
+ };
+UNIMPLEMENTED_INTRINSIC_LIST_ARM64(TRUE_OVERRIDE)
+#undef TRUE_OVERRIDE
+
+#include "intrinsics_list.h"
+static constexpr bool kIsIntrinsicUnimplemented[] = {
+ false, // kNone
+#define IS_UNIMPLEMENTED(Intrinsic, ...) \
+ IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+ INTRINSICS_LIST(IS_UNIMPLEMENTED)
+#undef IS_UNIMPLEMENTED
+};
+#undef INTRINSICS_LIST
+
+} // namespace detail
+
CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats)
@@ -943,7 +972,8 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
callee_saved_core_registers.GetList(),
callee_saved_fp_registers.GetList(),
compiler_options,
- stats),
+ stats,
+ ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
location_builder_neon_(graph, this),
@@ -1169,9 +1199,21 @@ void InstructionCodeGeneratorARM64::GenerateMethodEntryExitHook(HInstruction* in
new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARM64(instruction);
codegen_->AddSlowPath(slow_path);
+ if (instruction->IsMethodExitHook()) {
+ // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
+ // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
+ // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
+ // disabled in debuggable runtime. The other bit is used when this method itself requires a
+ // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
+ __ Ldr(value, MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
+ __ Cbnz(value, slow_path->GetEntryLabel());
+ }
+
uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
- int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value();
- __ Mov(temp, address + offset);
+ MemberOffset offset = instruction->IsMethodExitHook() ?
+ instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
+ instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
+ __ Mov(temp, address + offset.Int32Value());
__ Ldrb(value, MemOperand(temp, 0));
__ Cbnz(value, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
@@ -1233,6 +1275,54 @@ void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) {
void CodeGeneratorARM64::GenerateFrameEntry() {
MacroAssembler* masm = GetVIXLAssembler();
+
+ // Check if we need to generate the clinit check. We will jump to the
+ // resolution stub if the class is not initialized and the executing thread is
+ // not the thread initializing it.
+ // We do this before constructing the frame to get the correct stack trace if
+ // an exception is thrown.
+ if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
+ UseScratchRegisterScope temps(masm);
+ vixl::aarch64::Label resolution;
+ vixl::aarch64::Label memory_barrier;
+
+ Register temp1 = temps.AcquireW();
+ Register temp2 = temps.AcquireW();
+
+ // Check if we're visibly initialized.
+
+ // We don't emit a read barrier here to save on code size. We rely on the
+ // resolution trampoline to do a suspend check before re-entering this code.
+ __ Ldr(temp1, MemOperand(kArtMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()));
+ __ Ldrb(temp2, HeapOperand(temp1, status_byte_offset));
+ __ Cmp(temp2, shifted_visibly_initialized_value);
+ __ B(hs, &frame_entry_label_);
+
+ // Check if we're initialized and jump to code that does a memory barrier if
+ // so.
+ __ Cmp(temp2, shifted_initialized_value);
+ __ B(hs, &memory_barrier);
+
+ // Check if we're initializing and the thread initializing is the one
+ // executing the code.
+ __ Cmp(temp2, shifted_initializing_value);
+ __ B(lo, &resolution);
+
+ __ Ldr(temp1, HeapOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value()));
+ __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArm64PointerSize>().Int32Value()));
+ __ Cmp(temp1, temp2);
+ __ B(eq, &frame_entry_label_);
+ __ Bind(&resolution);
+
+ // Jump to the resolution stub.
+ ThreadOffset64 entrypoint_offset =
+ GetThreadOffset<kArm64PointerSize>(kQuickQuickResolutionTrampoline);
+ __ Ldr(temp1.X(), MemOperand(tr, entrypoint_offset.Int32Value()));
+ __ Br(temp1.X());
+
+ __ Bind(&memory_barrier);
+ GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
__ Bind(&frame_entry_label_);
bool do_overflow_check =
@@ -1364,12 +1454,12 @@ void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* l
}
}
-void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) {
+void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool emit_null_check) {
UseScratchRegisterScope temps(GetVIXLAssembler());
Register card = temps.AcquireX();
Register temp = temps.AcquireW(); // Index within the CardTable - 32bit.
vixl::aarch64::Label done;
- if (value_can_be_null) {
+ if (emit_null_check) {
__ Cbz(value, &done);
}
// Load the address of the card table into `card`.
@@ -1391,7 +1481,7 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_
// of the card to mark; and 2. to load the `kCardDirty` value) saves a load
// (no need to explicitly load `kCardDirty` as an immediate value).
__ Strb(card, MemOperand(card, temp.X()));
- if (value_can_be_null) {
+ if (emit_null_check) {
__ Bind(&done);
}
}
@@ -1904,11 +1994,6 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod
Register class_reg) {
UseScratchRegisterScope temps(GetVIXLAssembler());
Register temp = temps.AcquireW();
- constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
- const size_t status_byte_offset =
- mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
- constexpr uint32_t shifted_visibly_initialized_value =
- enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
// CMP (immediate) is limited to imm12 or imm12<<12, so we would need to materialize
// the constant 0xf0000000 for comparison with the full 32-bit field. To reduce the code
@@ -1974,6 +2059,13 @@ bool CodeGeneratorARM64::CanUseImplicitSuspendCheck() const {
void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
HBasicBlock* successor) {
+ if (instruction->IsNoOp()) {
+ if (successor != nullptr) {
+ __ B(codegen_->GetLabelOf(successor));
+ }
+ return;
+ }
+
if (codegen_->CanUseImplicitSuspendCheck()) {
__ Ldr(kImplicitSuspendCheckRegister, MemOperand(kImplicitSuspendCheckRegister));
codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
@@ -2051,7 +2143,7 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
bool object_field_get_with_read_barrier =
- kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
+ gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
object_field_get_with_read_barrier
@@ -2107,7 +2199,7 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
MemOperand field =
HeapOperand(InputRegisterAt(instruction, receiver_input), field_info.GetFieldOffset());
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier &&
+ if (gUseReadBarrier && kUseBakerReadBarrier &&
load_type == DataType::Type::kReference) {
// Object FieldGet with Baker's read barrier case.
// /* HeapReference<Object> */ out = *(base + offset)
@@ -2154,9 +2246,10 @@ void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
- if (IsConstantZeroBitPattern(instruction->InputAt(1))) {
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
- } else if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
+ HInstruction* value = instruction->InputAt(1);
+ if (IsZeroBitPattern(value)) {
+ locations->SetInAt(1, Location::ConstantLocation(value));
+ } else if (DataType::IsFloatingPointType(value->GetType())) {
locations->SetInAt(1, Location::RequiresFpuRegister());
} else {
locations->SetInAt(1, Location::RequiresRegister());
@@ -2165,7 +2258,8 @@ void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
- bool value_can_be_null) {
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind) {
DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
bool is_predicated =
instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
@@ -2205,8 +2299,12 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
}
}
- if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
- codegen_->MarkGCCard(obj, Register(value), value_can_be_null);
+ if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)) &&
+ write_barrier_kind != WriteBarrierKind::kDontEmit) {
+ codegen_->MarkGCCard(
+ obj,
+ Register(value),
+ value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck);
}
if (is_predicated) {
@@ -2382,7 +2480,7 @@ void LocationsBuilderARM64::VisitDataProcWithShifterOp(
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
if (instruction->GetInstrKind() == HInstruction::kNeg) {
- locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant()));
+ locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)));
} else {
locations->SetInAt(0, Location::RequiresRegister());
}
@@ -2475,7 +2573,7 @@ void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIn
// data offset constant generation out of the loop and reduce the critical path length in the
// loop.
locations->SetInAt(1, shift->GetValue() == 0
- ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant())
+ ? Location::ConstantLocation(instruction->GetOffset())
: Location::RequiresRegister());
locations->SetInAt(2, Location::ConstantLocation(shift));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
@@ -2549,7 +2647,7 @@ void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate*
void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
bool object_array_get_with_read_barrier =
- kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
+ gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
object_array_get_with_read_barrier
@@ -2605,10 +2703,10 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
// does not support the HIntermediateAddress instruction.
DCHECK(!((type == DataType::Type::kReference) &&
instruction->GetArray()->IsIntermediateAddress() &&
- kEmitCompilerReadBarrier &&
+ gUseReadBarrier &&
!kUseBakerReadBarrier));
- if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) {
// Object ArrayGet with Baker's read barrier case.
// Note that a potential implicit null check is handled in the
// CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
@@ -2750,9 +2848,10 @@ void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
instruction,
needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
- if (IsConstantZeroBitPattern(instruction->InputAt(2))) {
- locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetIndex()));
+ HInstruction* value = instruction->GetValue();
+ if (IsZeroBitPattern(value)) {
+ locations->SetInAt(2, Location::ConstantLocation(value));
} else if (DataType::IsFloatingPointType(value_type)) {
locations->SetInAt(2, Location::RequiresFpuRegister());
} else {
@@ -2871,7 +2970,11 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
}
}
- codegen_->MarkGCCard(array, value.W(), /* value_can_be_null= */ false);
+ if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
+ DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck)
+ << " Already null checked so we shouldn't do it again.";
+ codegen_->MarkGCCard(array, value.W(), /* emit_null_check= */ false);
+ }
if (can_value_be_null) {
DCHECK(do_store.IsLinked());
@@ -2929,10 +3032,10 @@ void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
HInstruction* length = instruction->InputAt(1);
bool both_const = index->IsConstant() && length->IsConstant();
locations->SetInAt(0, both_const
- ? Location::ConstantLocation(index->AsConstant())
+ ? Location::ConstantLocation(index)
: ARM64EncodableConstantOrRegister(index, instruction));
locations->SetInAt(1, both_const
- ? Location::ConstantLocation(length->AsConstant())
+ ? Location::ConstantLocation(length)
: ARM64EncodableConstantOrRegister(length, instruction));
}
@@ -3030,6 +3133,7 @@ void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
DataType::Type in_type = compare->InputAt(0)->GetType();
+ HInstruction* rhs = compare->InputAt(1);
switch (in_type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
@@ -3039,7 +3143,7 @@ void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
case DataType::Type::kInt32:
case DataType::Type::kInt64: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare));
+ locations->SetInAt(1, ARM64EncodableConstantOrRegister(rhs, compare));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
@@ -3047,8 +3151,8 @@ void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
case DataType::Type::kFloat64: {
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetInAt(1,
- IsFloatingPointZeroConstant(compare->InputAt(1))
- ? Location::ConstantLocation(compare->InputAt(1)->AsConstant())
+ IsFloatingPointZeroConstant(rhs)
+ ? Location::ConstantLocation(rhs)
: Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresRegister());
break;
@@ -3096,16 +3200,17 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
void LocationsBuilderARM64::HandleCondition(HCondition* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ HInstruction* rhs = instruction->InputAt(1);
if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetInAt(1,
- IsFloatingPointZeroConstant(instruction->InputAt(1))
- ? Location::ConstantLocation(instruction->InputAt(1)->AsConstant())
+ IsFloatingPointZeroConstant(rhs)
+ ? Location::ConstantLocation(rhs)
: Location::RequiresFpuRegister());
} else {
// Integer cases.
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
+ locations->SetInAt(1, ARM64EncodableConstantOrRegister(rhs, instruction));
}
if (!instruction->IsEmittedAtUseSite()) {
@@ -3845,12 +3950,12 @@ void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) {
}
}
-void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
- new (GetGraph()->GetAllocator()) LocationSummary(info);
+void LocationsBuilderARM64::VisitNop(HNop* nop) {
+ new (GetGraph()->GetAllocator()) LocationSummary(nop);
}
-void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) {
- // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
+void InstructionCodeGeneratorARM64::VisitNop(HNop*) {
+ // The environment recording already happened in CodeGenerator::Compile.
}
void CodeGeneratorARM64::IncreaseFrame(size_t adjustment) {
@@ -3893,12 +3998,15 @@ void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction
}
void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
+ HandleFieldSet(instruction,
+ instruction->GetFieldInfo(),
+ instruction->GetValueCanBeNull(),
+ instruction->GetWriteBarrierKind());
}
// Temp is used for read barrier.
static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
- if (kEmitCompilerReadBarrier &&
+ if (gUseReadBarrier &&
(kUseBakerReadBarrier ||
type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
@@ -3948,9 +4056,9 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
}
locations->SetInAt(0, Location::RequiresRegister());
if (type_check_kind == TypeCheckKind::kBitstringCheck) {
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
- locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
- locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
} else {
locations->SetInAt(1, Location::RequiresRegister());
}
@@ -4194,9 +4302,9 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
if (type_check_kind == TypeCheckKind::kBitstringCheck) {
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
- locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
- locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
} else {
locations->SetInAt(1, Location::RequiresRegister());
}
@@ -5313,7 +5421,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
load_kind == HLoadClass::LoadKind::kBssEntryPackage);
- const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+ const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage();
LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
@@ -5327,7 +5435,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
}
locations->SetOut(Location::RequiresRegister());
if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
- if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ if (!gUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the type resolution or initialization and marking to save everything we need.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
@@ -5354,7 +5462,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
? kWithoutReadBarrier
- : kCompilerReadBarrierOption;
+ : gCompilerReadBarrierOption;
bool generate_null_check = false;
switch (load_kind) {
case HLoadClass::LoadKind::kReferrersClass: {
@@ -5523,7 +5631,7 @@ void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
} else {
locations->SetOut(Location::RequiresRegister());
if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
- if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ if (!gUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the pResolveString and marking to save everything we need.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
@@ -5577,7 +5685,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
temp,
/* offset placeholder */ 0u,
ldr_label,
- kCompilerReadBarrierOption);
+ gCompilerReadBarrierOption);
SlowPathCodeARM64* slow_path =
new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load);
codegen_->AddSlowPath(slow_path);
@@ -5601,7 +5709,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
out.X(),
/* offset= */ 0,
/* fixup_label= */ nullptr,
- kCompilerReadBarrierOption);
+ gCompilerReadBarrierOption);
return;
}
default:
@@ -6156,7 +6264,10 @@ void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
}
void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
+ HandleFieldSet(instruction,
+ instruction->GetFieldInfo(),
+ instruction->GetValueCanBeNull(),
+ instruction->GetWriteBarrierKind());
}
void LocationsBuilderARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
@@ -6462,7 +6573,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
DataType::Type type = DataType::Type::kReference;
Register out_reg = RegisterFrom(out, type);
if (read_barrier_option == kWithReadBarrier) {
- CHECK(kEmitCompilerReadBarrier);
+ CHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
@@ -6503,7 +6614,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
Register out_reg = RegisterFrom(out, type);
Register obj_reg = RegisterFrom(obj, type);
if (read_barrier_option == kWithReadBarrier) {
- CHECK(kEmitCompilerReadBarrier);
+ CHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
@@ -6538,7 +6649,7 @@ void CodeGeneratorARM64::GenerateGcRootFieldLoad(
DCHECK(fixup_label == nullptr || offset == 0u);
Register root_reg = RegisterFrom(root, DataType::Type::kReference);
if (read_barrier_option == kWithReadBarrier) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used.
@@ -6604,7 +6715,7 @@ void CodeGeneratorARM64::GenerateGcRootFieldLoad(
void CodeGeneratorARM64::GenerateIntrinsicCasMoveWithBakerReadBarrier(
vixl::aarch64::Register marked_old_value,
vixl::aarch64::Register old_value) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
// Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
@@ -6626,7 +6737,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins
const vixl::aarch64::MemOperand& src,
bool needs_null_check,
bool use_load_acquire) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
// Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
@@ -6722,7 +6833,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instru
uint32_t data_offset,
Location index,
bool needs_null_check) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
static_assert(
@@ -6800,7 +6911,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instru
void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
// The following condition is a compile-time one, so it does not have a run-time cost.
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) {
+ if (kIsDebugBuild && gUseReadBarrier && kUseBakerReadBarrier) {
// The following condition is a run-time one; it is executed after the
// previous compile-time test, to avoid penalizing non-debug builds.
if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
@@ -6829,7 +6940,7 @@ void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
Location obj,
uint32_t offset,
Location index) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// Insert a slow path based read barrier *after* the reference load.
//
@@ -6854,7 +6965,7 @@ void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
Location obj,
uint32_t offset,
Location index) {
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Baker's read barriers shall be handled by the fast path
// (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
DCHECK(!kUseBakerReadBarrier);
@@ -6869,7 +6980,7 @@ void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
Location out,
Location root) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// Insert a slow path based read barrier *after* the GC root load.
//
@@ -7003,6 +7114,7 @@ static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
vixl::aarch64::MemOperand& lock_word,
vixl::aarch64::Label* slow_path,
vixl::aarch64::Label* throw_npe = nullptr) {
+ vixl::aarch64::Label throw_npe_cont;
// Load the lock word containing the rb_state.
__ Ldr(ip0.W(), lock_word);
// Given the numeric representation, it's enough to check the low bit of the rb_state.
@@ -7014,7 +7126,7 @@ static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
"Field and array LDR offsets must be the same to reuse the same code.");
// To throw NPE, we return to the fast path; the artificial dependence below does not matter.
if (throw_npe != nullptr) {
- __ Bind(throw_npe);
+ __ Bind(&throw_npe_cont);
}
// Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
@@ -7026,6 +7138,12 @@ static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
// a memory barrier (which would be more expensive).
__ Add(base_reg, base_reg, Operand(ip0, LSR, 32));
__ Br(lr); // And return back to the function.
+ if (throw_npe != nullptr) {
+ // Clear IP0 before returning to the fast path.
+ __ Bind(throw_npe);
+ __ Mov(ip0.X(), xzr);
+ __ B(&throw_npe_cont);
+ }
// Note: The fake dependency is unnecessary for the slow path.
}
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index f4d652c29c..6190364d1d 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
#include "base/bit_field.h"
+#include "base/macros.h"
#include "class_root.h"
#include "code_generator.h"
#include "common_arm64.h"
@@ -36,7 +37,7 @@
#include "aarch64/macro-assembler-aarch64.h"
#pragma GCC diagnostic pop
-namespace art {
+namespace art HIDDEN {
namespace linker {
class Arm64RelativePatcherTest;
@@ -92,7 +93,10 @@ const vixl::aarch64::CPURegList runtime_reserved_core_registers =
vixl::aarch64::CPURegList(
tr,
// Reserve X20 as Marking Register when emitting Baker read barriers.
- ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) ? mr : vixl::aarch64::NoCPUReg),
+ // TODO: We don't need to reserve marking-register for userfaultfd GC. But
+ // that would require some work in the assembler code as the right GC is
+ // chosen at load-time and not compile time.
+ (kReserveMarkingRegister ? mr : vixl::aarch64::NoCPUReg),
kImplicitSuspendCheckRegister,
vixl::aarch64::lr);
@@ -111,9 +115,7 @@ inline Location FixedTempLocation() {
const vixl::aarch64::CPURegList callee_saved_core_registers(
vixl::aarch64::CPURegister::kRegister,
vixl::aarch64::kXRegSize,
- ((kEmitCompilerReadBarrier && kUseBakerReadBarrier)
- ? vixl::aarch64::x21.GetCode()
- : vixl::aarch64::x20.GetCode()),
+ (kReserveMarkingRegister ? vixl::aarch64::x21.GetCode() : vixl::aarch64::x20.GetCode()),
vixl::aarch64::x30.GetCode());
const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegister::kVRegister,
vixl::aarch64::kDRegSize,
@@ -121,6 +123,41 @@ const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegi
vixl::aarch64::d15.GetCode());
Location ARM64ReturnLocation(DataType::Type return_type);
+#define UNIMPLEMENTED_INTRINSIC_LIST_ARM64(V) \
+ V(StringStringIndexOf) \
+ V(StringStringIndexOfAfter) \
+ V(StringBufferAppend) \
+ V(StringBufferLength) \
+ V(StringBufferToString) \
+ V(StringBuilderAppendObject) \
+ V(StringBuilderAppendString) \
+ V(StringBuilderAppendCharSequence) \
+ V(StringBuilderAppendCharArray) \
+ V(StringBuilderAppendBoolean) \
+ V(StringBuilderAppendChar) \
+ V(StringBuilderAppendInt) \
+ V(StringBuilderAppendLong) \
+ V(StringBuilderAppendFloat) \
+ V(StringBuilderAppendDouble) \
+ V(StringBuilderLength) \
+ V(StringBuilderToString) \
+ V(SystemArrayCopyByte) \
+ V(SystemArrayCopyInt) \
+ /* 1.8 */ \
+ V(UnsafeGetAndAddInt) \
+ V(UnsafeGetAndAddLong) \
+ V(UnsafeGetAndSetInt) \
+ V(UnsafeGetAndSetLong) \
+ V(UnsafeGetAndSetObject) \
+ V(MethodHandleInvokeExact) \
+ V(MethodHandleInvoke) \
+ /* OpenJDK 11 */ \
+ V(JdkUnsafeGetAndAddInt) \
+ V(JdkUnsafeGetAndAddLong) \
+ V(JdkUnsafeGetAndSetInt) \
+ V(JdkUnsafeGetAndSetLong) \
+ V(JdkUnsafeGetAndSetObject)
+
class SlowPathCodeARM64 : public SlowPathCode {
public:
explicit SlowPathCodeARM64(HInstruction* instruction)
@@ -327,7 +364,8 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
void HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
- bool value_can_be_null);
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
void HandleCondition(HCondition* instruction);
@@ -615,7 +653,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
// Emit a write barrier.
void MarkGCCard(vixl::aarch64::Register object,
vixl::aarch64::Register value,
- bool value_can_be_null);
+ bool emit_null_check);
void GenerateMemoryBarrier(MemBarrierKind kind);
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 09fa598203..d69e77045b 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -26,7 +26,6 @@
#include "class_table.h"
#include "code_generator_utils.h"
#include "common_arm.h"
-#include "compiled_method.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
#include "gc/space/image_space.h"
@@ -46,7 +45,7 @@
#include "utils/assembler.h"
#include "utils/stack_checks.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
namespace vixl32 = vixl::aarch32;
@@ -744,7 +743,7 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
obj_(obj),
offset_(offset),
index_(index) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// If `obj` is equal to `out` or `ref`, it means the initial object
// has been overwritten by (or after) the heap object reference load
// to be instrumented, e.g.:
@@ -922,7 +921,7 @@ class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
ReadBarrierForRootSlowPathARMVIXL(HInstruction* instruction, Location out, Location root)
: SlowPathCodeARMVIXL(instruction), out_(out), root_(root) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
}
void EmitNativeCode(CodeGenerator* codegen) override {
@@ -974,6 +973,10 @@ class MethodEntryExitHooksSlowPathARMVIXL : public SlowPathCodeARMVIXL {
(instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
+ if (instruction_->IsMethodExitHook()) {
+ // Load frame size to pass to the exit hooks
+ __ Mov(vixl::aarch32::Register(R2), arm_codegen->GetFrameSize());
+ }
arm_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
RestoreLiveRegisters(codegen, locations);
__ B(GetExitLabel());
@@ -1845,7 +1848,7 @@ static Location Arm8BitEncodableConstantOrRegister(HInstruction* constant) {
DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
if (constant->IsConstant() && CanEncodeConstantAs8BitImmediate(constant->AsConstant())) {
- return Location::ConstantLocation(constant->AsConstant());
+ return Location::ConstantLocation(constant);
}
return Location::RequiresRegister();
@@ -1904,6 +1907,33 @@ vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction,
return final_label;
}
+namespace detail {
+// Mark which intrinsics we don't have handcrafted code for.
+template <Intrinsics T>
+struct IsUnimplemented {
+ bool is_unimplemented = false;
+};
+
+#define TRUE_OVERRIDE(Name) \
+ template <> \
+ struct IsUnimplemented<Intrinsics::k##Name> { \
+ bool is_unimplemented = true; \
+ };
+UNIMPLEMENTED_INTRINSIC_LIST_ARM(TRUE_OVERRIDE)
+#undef TRUE_OVERRIDE
+
+#include "intrinsics_list.h"
+static constexpr bool kIsIntrinsicUnimplemented[] = {
+ false, // kNone
+#define IS_UNIMPLEMENTED(Intrinsic, ...) \
+ IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+ INTRINSICS_LIST(IS_UNIMPLEMENTED)
+#undef IS_UNIMPLEMENTED
+};
+#undef INTRINSICS_LIST
+
+} // namespace detail
+
CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats)
@@ -1914,7 +1944,8 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
kCoreCalleeSaves.GetList(),
ComputeSRegisterListMask(kFpuCalleeSaves),
compiler_options,
- stats),
+ stats,
+ ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
location_builder_(graph, this),
@@ -2101,7 +2132,10 @@ void CodeGeneratorARMVIXL::SetupBlockedRegisters() const {
blocked_core_registers_[LR] = true;
blocked_core_registers_[PC] = true;
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // TODO: We don't need to reserve marking-register for userfaultfd GC. But
+ // that would require some work in the assembler code as the right GC is
+ // chosen at load-time and not compile time.
+ if (kReserveMarkingRegister) {
// Reserve marking register.
blocked_core_registers_[MR] = true;
}
@@ -2164,9 +2198,24 @@ void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction*
new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARMVIXL(instruction);
codegen_->AddSlowPath(slow_path);
- int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value();
+ if (instruction->IsMethodExitHook()) {
+ // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
+ // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
+ // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
+ // disabled in debuggable runtime. The other bit is used when this method itself requires a
+ // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
+ GetAssembler()->LoadFromOffset(kLoadWord,
+ temp,
+ sp,
+ codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
+ __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
+ }
+
+ MemberOffset offset = instruction->IsMethodExitHook() ?
+ instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
+ instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
uint32_t address = reinterpret_cast32<uint32_t>(Runtime::Current()->GetInstrumentation());
- __ Mov(temp, address + offset);
+ __ Mov(temp, address + offset.Int32Value());
__ Ldrb(temp, MemOperand(temp, 0));
__ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
@@ -2234,6 +2283,61 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() {
bool skip_overflow_check =
IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
+
+ // Check if we need to generate the clinit check. We will jump to the
+ // resolution stub if the class is not initialized and the executing thread is
+ // not the thread initializing it.
+ // We do this before constructing the frame to get the correct stack trace if
+ // an exception is thrown.
+ if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ vixl32::Label resolution;
+ vixl32::Label memory_barrier;
+
+ // Check if we're visibly initialized.
+
+ vixl32::Register temp1 = temps.Acquire();
+ // Use r4 as other temporary register.
+ DCHECK(!blocked_core_registers_[R4]);
+ DCHECK(!kCoreCalleeSaves.Includes(r4));
+ vixl32::Register temp2 = r4;
+ for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
+ DCHECK(!reg.Is(r4));
+ }
+
+ // We don't emit a read barrier here to save on code size. We rely on the
+ // resolution trampoline to do a suspend check before re-entering this code.
+ __ Ldr(temp1, MemOperand(kMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()));
+ __ Ldrb(temp2, MemOperand(temp1, status_byte_offset));
+ __ Cmp(temp2, shifted_visibly_initialized_value);
+ __ B(cs, &frame_entry_label_);
+
+ // Check if we're initialized and jump to code that does a memory barrier if
+ // so.
+ __ Cmp(temp2, shifted_initialized_value);
+ __ B(cs, &memory_barrier);
+
+ // Check if we're initializing and the thread initializing is the one
+ // executing the code.
+ __ Cmp(temp2, shifted_initializing_value);
+ __ B(lo, &resolution);
+
+ __ Ldr(temp1, MemOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value()));
+ __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArmPointerSize>().Int32Value()));
+ __ Cmp(temp1, temp2);
+ __ B(eq, &frame_entry_label_);
+ __ Bind(&resolution);
+
+ // Jump to the resolution stub.
+ ThreadOffset32 entrypoint_offset =
+ GetThreadOffset<kArmPointerSize>(kQuickQuickResolutionTrampoline);
+ __ Ldr(temp1, MemOperand(tr, entrypoint_offset.Int32Value()));
+ __ Bx(temp1);
+
+ __ Bind(&memory_barrier);
+ GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+
__ Bind(&frame_entry_label_);
if (HasEmptyFrame()) {
@@ -3069,12 +3173,12 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
}
}
-void LocationsBuilderARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo* info) {
- new (GetGraph()->GetAllocator()) LocationSummary(info);
+void LocationsBuilderARMVIXL::VisitNop(HNop* nop) {
+ new (GetGraph()->GetAllocator()) LocationSummary(nop);
}
-void InstructionCodeGeneratorARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo*) {
- // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
+void InstructionCodeGeneratorARMVIXL::VisitNop(HNop*) {
+ // The environment recording already happened in CodeGenerator::Compile.
}
void CodeGeneratorARMVIXL::IncreaseFrame(size_t adjustment) {
@@ -4514,10 +4618,11 @@ void LocationsBuilderARMVIXL::VisitDiv(HDiv* div) {
switch (div->GetResultType()) {
case DataType::Type::kInt32: {
- if (div->InputAt(1)->IsConstant()) {
+ HInstruction* divisor = div->InputAt(1);
+ if (divisor->IsConstant()) {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant()));
- int32_t value = Int32ConstantFrom(div->InputAt(1));
+ locations->SetInAt(1, Location::ConstantLocation(divisor));
+ int32_t value = Int32ConstantFrom(divisor);
Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
if (value == 1 || value == 0 || value == -1) {
// No temp register required.
@@ -4631,10 +4736,11 @@ void LocationsBuilderARMVIXL::VisitRem(HRem* rem) {
switch (type) {
case DataType::Type::kInt32: {
- if (rem->InputAt(1)->IsConstant()) {
+ HInstruction* divisor = rem->InputAt(1);
+ if (divisor->IsConstant()) {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant()));
- int32_t value = Int32ConstantFrom(rem->InputAt(1));
+ locations->SetInAt(1, Location::ConstantLocation(divisor));
+ int32_t value = Int32ConstantFrom(divisor);
Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
if (value == 1 || value == 0 || value == -1) {
// No temp register required.
@@ -5187,17 +5293,18 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) {
void LocationsBuilderARMVIXL::VisitRor(HRor* ror) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
+ HInstruction* shift = ror->InputAt(1);
switch (ror->GetResultType()) {
case DataType::Type::kInt32: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrConstant(ror->InputAt(1)));
+ locations->SetInAt(1, Location::RegisterOrConstant(shift));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
case DataType::Type::kInt64: {
locations->SetInAt(0, Location::RequiresRegister());
- if (ror->InputAt(1)->IsConstant()) {
- locations->SetInAt(1, Location::ConstantLocation(ror->InputAt(1)->AsConstant()));
+ if (shift->IsConstant()) {
+ locations->SetInAt(1, Location::ConstantLocation(shift));
} else {
locations->SetInAt(1, Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
@@ -5234,11 +5341,12 @@ void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
+ HInstruction* shift = op->InputAt(1);
switch (op->GetResultType()) {
case DataType::Type::kInt32: {
locations->SetInAt(0, Location::RequiresRegister());
- if (op->InputAt(1)->IsConstant()) {
- locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
+ if (shift->IsConstant()) {
+ locations->SetInAt(1, Location::ConstantLocation(shift));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
} else {
locations->SetInAt(1, Location::RequiresRegister());
@@ -5250,8 +5358,8 @@ void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) {
}
case DataType::Type::kInt64: {
locations->SetInAt(0, Location::RequiresRegister());
- if (op->InputAt(1)->IsConstant()) {
- locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
+ if (shift->IsConstant()) {
+ locations->SetInAt(1, Location::ConstantLocation(shift));
// For simplicity, use kOutputOverlap even though we only require that low registers
// don't clash with high registers which the register allocator currently guarantees.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -5727,8 +5835,9 @@ void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register a
__ CompareAndBranchIfNonZero(temp1, &fail);
}
-void LocationsBuilderARMVIXL::HandleFieldSet(
- HInstruction* instruction, const FieldInfo& field_info) {
+void LocationsBuilderARMVIXL::HandleFieldSet(HInstruction* instruction,
+ const FieldInfo& field_info,
+ WriteBarrierKind write_barrier_kind) {
DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
LocationSummary* locations =
@@ -5751,8 +5860,12 @@ void LocationsBuilderARMVIXL::HandleFieldSet(
// Temporary registers for the write barrier.
// TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark.
if (needs_write_barrier) {
- locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
- locations->AddTemp(Location::RequiresRegister());
+ if (write_barrier_kind != WriteBarrierKind::kDontEmit) {
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ } else if (kPoisonHeapReferences) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
} else if (generate_volatile) {
// ARM encoding have some additional constraints for ldrexd/strexd:
// - registers need to be consecutive
@@ -5773,7 +5886,8 @@ void LocationsBuilderARMVIXL::HandleFieldSet(
void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
- bool value_can_be_null) {
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind) {
DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
LocationSummary* locations = instruction->GetLocations();
@@ -5889,10 +6003,16 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction,
UNREACHABLE();
}
- if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
+ if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)) &&
+ write_barrier_kind != WriteBarrierKind::kDontEmit) {
vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
vixl32::Register card = RegisterFrom(locations->GetTemp(1));
- codegen_->MarkGCCard(temp, card, base, RegisterFrom(value), value_can_be_null);
+ codegen_->MarkGCCard(
+ temp,
+ card,
+ base,
+ RegisterFrom(value),
+ value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck);
}
if (is_volatile) {
@@ -5911,7 +6031,7 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction,
instruction->IsPredicatedInstanceFieldGet());
bool object_field_get_with_read_barrier =
- kEmitCompilerReadBarrier && (field_info.GetFieldType() == DataType::Type::kReference);
+ gUseReadBarrier && (field_info.GetFieldType() == DataType::Type::kReference);
bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
@@ -5975,7 +6095,7 @@ Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* inpu
DCHECK(DataType::IsFloatingPointType(input->GetType())) << input->GetType();
if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) ||
(input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) {
- return Location::ConstantLocation(input->AsConstant());
+ return Location::ConstantLocation(input);
} else {
return Location::RequiresFpuRegister();
}
@@ -5986,7 +6106,7 @@ Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* c
DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
if (constant->IsConstant() &&
CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) {
- return Location::ConstantLocation(constant->AsConstant());
+ return Location::ConstantLocation(constant);
}
return Location::RequiresRegister();
}
@@ -6082,7 +6202,7 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
case DataType::Type::kReference: {
// /* HeapReference<Object> */ out = *(base + offset)
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
// Note that a potential implicit null check is handled in this
// CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier call.
@@ -6165,11 +6285,14 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
}
void LocationsBuilderARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo());
+ HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
}
void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
+ HandleFieldSet(instruction,
+ instruction->GetFieldInfo(),
+ instruction->GetValueCanBeNull(),
+ instruction->GetWriteBarrierKind());
}
void LocationsBuilderARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
@@ -6202,11 +6325,14 @@ void InstructionCodeGeneratorARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instr
}
void LocationsBuilderARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo());
+ HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
}
void InstructionCodeGeneratorARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
+ HandleFieldSet(instruction,
+ instruction->GetFieldInfo(),
+ instruction->GetValueCanBeNull(),
+ instruction->GetWriteBarrierKind());
}
void LocationsBuilderARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
@@ -6386,7 +6512,7 @@ void CodeGeneratorARMVIXL::StoreToShiftedRegOffset(DataType::Type type,
void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) {
bool object_array_get_with_read_barrier =
- kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
+ gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
object_array_get_with_read_barrier
@@ -6534,14 +6660,14 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
// The read barrier instrumentation of object ArrayGet
// instructions does not support the HIntermediateAddress
// instruction.
- DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
+ DCHECK(!(has_intermediate_address && gUseReadBarrier));
static_assert(
sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
// /* HeapReference<Object> */ out =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Note that a potential implicit null check is handled in this
// CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call.
DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
@@ -6688,8 +6814,10 @@ void LocationsBuilderARMVIXL::VisitArraySet(HArraySet* instruction) {
locations->SetInAt(2, Location::RequiresRegister());
}
if (needs_write_barrier) {
- // Temporary registers for the write barrier.
- locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
+ // Temporary registers for the write barrier or register poisoning.
+ // TODO(solanes): We could reduce the temp usage but it requires some non-trivial refactoring of
+ // InstructionCodeGeneratorARMVIXL::VisitArraySet.
+ locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
}
}
@@ -6841,7 +6969,11 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) {
}
}
- codegen_->MarkGCCard(temp1, temp2, array, value, /* value_can_be_null= */ false);
+ if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
+ DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck)
+ << " Already null checked so we shouldn't do it again.";
+ codegen_->MarkGCCard(temp1, temp2, array, value, /* emit_null_check= */ false);
+ }
if (can_value_be_null) {
DCHECK(do_store.IsReferenced());
@@ -7025,10 +7157,10 @@ void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
// locations.
bool both_const = index->IsConstant() && length->IsConstant();
locations->SetInAt(0, both_const
- ? Location::ConstantLocation(index->AsConstant())
+ ? Location::ConstantLocation(index)
: ArmEncodableConstantOrRegister(index, CMP));
locations->SetInAt(1, both_const
- ? Location::ConstantLocation(length->AsConstant())
+ ? Location::ConstantLocation(length)
: ArmEncodableConstantOrRegister(length, CMP));
}
@@ -7072,9 +7204,9 @@ void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
vixl32::Register card,
vixl32::Register object,
vixl32::Register value,
- bool value_can_be_null) {
+ bool emit_null_check) {
vixl32::Label is_null;
- if (value_can_be_null) {
+ if (emit_null_check) {
__ CompareAndBranchIfZero(value, &is_null, /* is_far_target=*/ false);
}
// Load the address of the card table into `card`.
@@ -7097,7 +7229,7 @@ void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
// of the card to mark; and 2. to load the `kCardDirty` value) saves a load
// (no need to explicitly load `kCardDirty` as an immediate value).
__ Strb(card, MemOperand(card, temp));
- if (value_can_be_null) {
+ if (emit_null_check) {
__ Bind(&is_null);
}
}
@@ -7459,7 +7591,7 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
load_kind == HLoadClass::LoadKind::kBssEntryPackage);
- const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+ const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage();
LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
@@ -7473,7 +7605,7 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
}
locations->SetOut(Location::RequiresRegister());
if (load_kind == HLoadClass::LoadKind::kBssEntry) {
- if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ if (!gUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the type resolution or initialization and marking to save everything we need.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
@@ -7501,7 +7633,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
? kWithoutReadBarrier
- : kCompilerReadBarrierOption;
+ : gCompilerReadBarrierOption;
bool generate_null_check = false;
switch (load_kind) {
case HLoadClass::LoadKind::kReferrersClass: {
@@ -7622,12 +7754,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck(
LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) {
UseScratchRegisterScope temps(GetVIXLAssembler());
vixl32::Register temp = temps.Acquire();
- constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
- constexpr uint32_t shifted_visibly_initialized_value =
- enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << status_lsb_position;
-
- const size_t status_offset = mirror::Class::StatusOffset().SizeValue();
- GetAssembler()->LoadFromOffset(kLoadWord, temp, class_reg, status_offset);
+ __ Ldrb(temp, MemOperand(class_reg, status_byte_offset));
__ Cmp(temp, shifted_visibly_initialized_value);
__ B(lo, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
@@ -7721,7 +7848,7 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
} else {
locations->SetOut(Location::RequiresRegister());
if (load_kind == HLoadString::LoadKind::kBssEntry) {
- if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ if (!gUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the pResolveString and marking to save everything we need, including temps.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
@@ -7760,7 +7887,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
codegen_->EmitMovwMovtPlaceholder(labels, out);
// All aligned loads are implicitly atomic consume operations on ARM.
codegen_->GenerateGcRootFieldLoad(
- load, out_loc, out, /*offset=*/ 0, kCompilerReadBarrierOption);
+ load, out_loc, out, /*offset=*/ 0, gCompilerReadBarrierOption);
LoadStringSlowPathARMVIXL* slow_path =
new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load);
codegen_->AddSlowPath(slow_path);
@@ -7781,7 +7908,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
load->GetString()));
// /* GcRoot<mirror::String> */ out = *out
codegen_->GenerateGcRootFieldLoad(
- load, out_loc, out, /*offset=*/ 0, kCompilerReadBarrierOption);
+ load, out_loc, out, /*offset=*/ 0, gCompilerReadBarrierOption);
return;
}
default:
@@ -7838,7 +7965,7 @@ void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) {
// Temp is used for read barrier.
static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
- if (kEmitCompilerReadBarrier &&
+ if (gUseReadBarrier &&
(kUseBakerReadBarrier ||
type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
@@ -7888,9 +8015,9 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
}
locations->SetInAt(0, Location::RequiresRegister());
if (type_check_kind == TypeCheckKind::kBitstringCheck) {
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
- locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
- locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
} else {
locations->SetInAt(1, Location::RequiresRegister());
}
@@ -8185,9 +8312,9 @@ void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) {
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
if (type_check_kind == TypeCheckKind::kBitstringCheck) {
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
- locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
- locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
} else {
locations->SetInAt(1, Location::RequiresRegister());
}
@@ -8773,7 +8900,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadOneRegister(
ReadBarrierOption read_barrier_option) {
vixl32::Register out_reg = RegisterFrom(out);
if (read_barrier_option == kWithReadBarrier) {
- CHECK(kEmitCompilerReadBarrier);
+ CHECK(gUseReadBarrier);
DCHECK(maybe_temp.IsRegister()) << maybe_temp;
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
@@ -8808,7 +8935,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters(
vixl32::Register out_reg = RegisterFrom(out);
vixl32::Register obj_reg = RegisterFrom(obj);
if (read_barrier_option == kWithReadBarrier) {
- CHECK(kEmitCompilerReadBarrier);
+ CHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
DCHECK(maybe_temp.IsRegister()) << maybe_temp;
// Load with fast path based Baker's read barrier.
@@ -8837,7 +8964,7 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
ReadBarrierOption read_barrier_option) {
vixl32::Register root_reg = RegisterFrom(root);
if (read_barrier_option == kWithReadBarrier) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used.
@@ -8901,7 +9028,7 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
void CodeGeneratorARMVIXL::GenerateIntrinsicCasMoveWithBakerReadBarrier(
vixl::aarch32::Register marked_old_value,
vixl::aarch32::Register old_value) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
// Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
@@ -8935,7 +9062,7 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i
vixl32::Register obj,
const vixl32::MemOperand& src,
bool needs_null_check) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
// Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
@@ -9028,7 +9155,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref,
Location index,
Location temp,
bool needs_null_check) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
static_assert(
@@ -9094,7 +9221,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref,
void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
// The following condition is a compile-time one, so it does not have a run-time cost.
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) {
+ if (kIsDebugBuild && gUseReadBarrier && kUseBakerReadBarrier) {
// The following condition is a run-time one; it is executed after the
// previous compile-time test, to avoid penalizing non-debug builds.
if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
@@ -9124,7 +9251,7 @@ void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction,
Location obj,
uint32_t offset,
Location index) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// Insert a slow path based read barrier *after* the reference load.
//
@@ -9150,7 +9277,7 @@ void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instructio
Location obj,
uint32_t offset,
Location index) {
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Baker's read barriers shall be handled by the fast path
// (CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier).
DCHECK(!kUseBakerReadBarrier);
@@ -9165,7 +9292,7 @@ void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instructio
void CodeGeneratorARMVIXL::GenerateReadBarrierForRootSlow(HInstruction* instruction,
Location out,
Location root) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// Insert a slow path based read barrier *after* the GC root load.
//
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 790ad0f8f7..f5abe6951a 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_VIXL_H_
#include "base/enums.h"
+#include "base/macros.h"
#include "class_root.h"
#include "code_generator.h"
#include "common_arm.h"
@@ -36,7 +37,7 @@
#include "aarch32/macro-assembler-aarch32.h"
#pragma GCC diagnostic pop
-namespace art {
+namespace art HIDDEN {
namespace linker {
class Thumb2RelativePatcherTest;
@@ -84,7 +85,7 @@ static const vixl::aarch32::RegisterList kCoreCalleeSaves = vixl::aarch32::Regis
vixl::aarch32::r6,
vixl::aarch32::r7),
// Do not consider r8 as a callee-save register with Baker read barriers.
- ((kEmitCompilerReadBarrier && kUseBakerReadBarrier)
+ (kReserveMarkingRegister
? vixl::aarch32::RegisterList()
: vixl::aarch32::RegisterList(vixl::aarch32::r8)),
vixl::aarch32::RegisterList(vixl::aarch32::r10,
@@ -118,6 +119,65 @@ class CodeGeneratorARMVIXL;
using VIXLInt32Literal = vixl::aarch32::Literal<int32_t>;
using VIXLUInt32Literal = vixl::aarch32::Literal<uint32_t>;
+#define UNIMPLEMENTED_INTRINSIC_LIST_ARM(V) \
+ V(MathRoundDouble) /* Could be done by changing rounding mode, maybe? */ \
+ V(UnsafeCASLong) /* High register pressure */ \
+ V(SystemArrayCopyChar) \
+ V(LongDivideUnsigned) \
+ V(CRC32Update) \
+ V(CRC32UpdateBytes) \
+ V(CRC32UpdateByteBuffer) \
+ V(FP16ToFloat) \
+ V(FP16ToHalf) \
+ V(FP16Floor) \
+ V(FP16Ceil) \
+ V(FP16Rint) \
+ V(FP16Greater) \
+ V(FP16GreaterEquals) \
+ V(FP16Less) \
+ V(FP16LessEquals) \
+ V(FP16Compare) \
+ V(FP16Min) \
+ V(FP16Max) \
+ V(MathMultiplyHigh) \
+ V(StringStringIndexOf) \
+ V(StringStringIndexOfAfter) \
+ V(StringBufferAppend) \
+ V(StringBufferLength) \
+ V(StringBufferToString) \
+ V(StringBuilderAppendObject) \
+ V(StringBuilderAppendString) \
+ V(StringBuilderAppendCharSequence) \
+ V(StringBuilderAppendCharArray) \
+ V(StringBuilderAppendBoolean) \
+ V(StringBuilderAppendChar) \
+ V(StringBuilderAppendInt) \
+ V(StringBuilderAppendLong) \
+ V(StringBuilderAppendFloat) \
+ V(StringBuilderAppendDouble) \
+ V(StringBuilderLength) \
+ V(StringBuilderToString) \
+ V(SystemArrayCopyByte) \
+ V(SystemArrayCopyInt) \
+ /* 1.8 */ \
+ V(MathFmaDouble) \
+ V(MathFmaFloat) \
+ V(UnsafeGetAndAddInt) \
+ V(UnsafeGetAndAddLong) \
+ V(UnsafeGetAndSetInt) \
+ V(UnsafeGetAndSetLong) \
+ V(UnsafeGetAndSetObject) \
+ V(MethodHandleInvokeExact) \
+ V(MethodHandleInvoke) \
+ /* OpenJDK 11 */ \
+ V(JdkUnsafeCASLong) /* High register pressure */ \
+ V(JdkUnsafeGetAndAddInt) \
+ V(JdkUnsafeGetAndAddLong) \
+ V(JdkUnsafeGetAndSetInt) \
+ V(JdkUnsafeGetAndSetLong) \
+ V(JdkUnsafeGetAndSetObject) \
+ V(JdkUnsafeCompareAndSetLong)
+
class JumpTableARMVIXL : public DeletableArenaObject<kArenaAllocSwitchTable> {
public:
explicit JumpTableARMVIXL(HPackedSwitch* switch_instr)
@@ -309,7 +369,9 @@ class LocationsBuilderARMVIXL : public HGraphVisitor {
void HandleIntegerRotate(LocationSummary* locations);
void HandleLongRotate(LocationSummary* locations);
void HandleShift(HBinaryOperation* operation);
- void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
+ void HandleFieldSet(HInstruction* instruction,
+ const FieldInfo& field_info,
+ WriteBarrierKind write_barrier_kind);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
Location ArithmeticZeroOrFpuRegister(HInstruction* input);
@@ -378,7 +440,8 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
void HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
- bool value_can_be_null);
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
void GenerateMinMaxInt(LocationSummary* locations, bool is_min);
@@ -542,7 +605,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
vixl::aarch32::Register card,
vixl::aarch32::Register object,
vixl::aarch32::Register value,
- bool value_can_be_null);
+ bool emit_null_check);
void GenerateMemoryBarrier(MemBarrierKind kind);
@@ -602,7 +665,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
struct PcRelativePatchInfo {
PcRelativePatchInfo(const DexFile* dex_file, uint32_t off_or_idx)
: target_dex_file(dex_file), offset_or_index(off_or_idx) { }
- PcRelativePatchInfo(PcRelativePatchInfo&& other) = default;
// Target dex file or null for .data.bmig.rel.ro patches.
const DexFile* target_dex_file;
diff --git a/compiler/optimizing/code_generator_riscv64.h b/compiler/optimizing/code_generator_riscv64.h
new file mode 100644
index 0000000000..405b39aa0a
--- /dev/null
+++ b/compiler/optimizing/code_generator_riscv64.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_
+#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_
+
+#include "code_generator.h"
+#include "driver/compiler_options.h"
+
+#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_
diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc
index abec26464a..99805928e4 100644
--- a/compiler/optimizing/code_generator_utils.cc
+++ b/compiler/optimizing/code_generator_utils.cc
@@ -20,7 +20,7 @@
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long,
int64_t* magic, int* shift) {
diff --git a/compiler/optimizing/code_generator_utils.h b/compiler/optimizing/code_generator_utils.h
index 64665adc15..9d9ab2b118 100644
--- a/compiler/optimizing/code_generator_utils.h
+++ b/compiler/optimizing/code_generator_utils.h
@@ -21,7 +21,9 @@
#include <cstdlib>
#include <limits>
-namespace art {
+#include "base/macros.h"
+
+namespace art HIDDEN {
class HInstruction;
diff --git a/compiler/optimizing/code_generator_vector_arm64_neon.cc b/compiler/optimizing/code_generator_vector_arm64_neon.cc
index 0fe9898635..6b6e25cf0c 100644
--- a/compiler/optimizing/code_generator_vector_arm64_neon.cc
+++ b/compiler/optimizing/code_generator_vector_arm64_neon.cc
@@ -23,7 +23,7 @@
using namespace vixl::aarch64; // NOLINT(build/namespaces)
-namespace art {
+namespace art HIDDEN {
namespace arm64 {
using helpers::DRegisterFrom;
@@ -65,7 +65,7 @@ inline Location NEONEncodableConstantOrRegister(HInstruction* constant,
HInstruction* instr) {
if (constant->IsConstant()
&& NEONCanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
- return Location::ConstantLocation(constant->AsConstant());
+ return Location::ConstantLocation(constant);
}
return Location::RequiresRegister();
@@ -94,7 +94,7 @@ void LocationsBuilderARM64Neon::VisitVecReplicateScalar(HVecReplicateScalar* ins
case DataType::Type::kFloat64:
if (input->IsConstant() &&
NEONCanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
- locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
+ locations->SetInAt(0, Location::ConstantLocation(input));
locations->SetOut(Location::RequiresFpuRegister());
} else {
locations->SetInAt(0, Location::RequiresFpuRegister());
@@ -881,7 +881,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
case DataType::Type::kInt32:
case DataType::Type::kInt64:
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
@@ -1008,13 +1008,13 @@ void LocationsBuilderARM64Neon::VisitVecSetScalars(HVecSetScalars* instruction)
case DataType::Type::kInt16:
case DataType::Type::kInt32:
case DataType::Type::kInt64:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
diff --git a/compiler/optimizing/code_generator_vector_arm64_sve.cc b/compiler/optimizing/code_generator_vector_arm64_sve.cc
index 824b6c9476..fe15791d3f 100644
--- a/compiler/optimizing/code_generator_vector_arm64_sve.cc
+++ b/compiler/optimizing/code_generator_vector_arm64_sve.cc
@@ -23,17 +23,14 @@
using namespace vixl::aarch64; // NOLINT(build/namespaces)
-namespace art {
+namespace art HIDDEN {
namespace arm64 {
using helpers::DRegisterFrom;
-using helpers::HeapOperand;
using helpers::InputRegisterAt;
using helpers::Int64FromLocation;
using helpers::LocationFrom;
using helpers::OutputRegister;
-using helpers::QRegisterFrom;
-using helpers::StackOperandFrom;
using helpers::SveStackOperandFrom;
using helpers::VRegisterFrom;
using helpers::ZRegisterFrom;
@@ -67,7 +64,7 @@ static bool SVECanEncodeConstantAsImmediate(HConstant* constant, HInstruction* i
inline Location SVEEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) {
if (constant->IsConstant()
&& SVECanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
- return Location::ConstantLocation(constant->AsConstant());
+ return Location::ConstantLocation(constant);
}
return Location::RequiresRegister();
@@ -96,7 +93,7 @@ void LocationsBuilderARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* inst
case DataType::Type::kFloat64:
if (input->IsConstant() &&
SVECanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
- locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
+ locations->SetInAt(0, Location::ConstantLocation(input));
locations->SetOut(Location::RequiresFpuRegister());
} else {
locations->SetInAt(0, Location::RequiresFpuRegister());
@@ -754,7 +751,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
case DataType::Type::kInt32:
case DataType::Type::kInt64:
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
@@ -878,13 +875,13 @@ void LocationsBuilderARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) {
case DataType::Type::kInt16:
case DataType::Type::kInt32:
case DataType::Type::kInt64:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index c46f9b7986..e8ecf28386 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -20,7 +20,7 @@
namespace vixl32 = vixl::aarch32;
using namespace vixl32; // NOLINT(build/namespaces)
-namespace art {
+namespace art HIDDEN {
namespace arm {
using helpers::DRegisterFrom;
@@ -640,7 +640,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
case DataType::Type::kInt16:
case DataType::Type::kInt32:
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
@@ -749,7 +749,7 @@ void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
switch (instruction->GetPackedType()) {
case DataType::Type::kInt32:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 9c837dd986..343a6e1af4 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -19,7 +19,7 @@
#include "mirror/array-inl.h"
#include "mirror/string.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
@@ -42,13 +42,13 @@ void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instructi
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresFpuRegister());
locations->SetOut(is_zero ? Location::RequiresFpuRegister()
: Location::SameAsFirstInput());
@@ -981,7 +981,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
case DataType::Type::kInt32:
case DataType::Type::kInt64:
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
locations->SetOut(Location::SameAsFirstInput());
break;
default:
@@ -1094,13 +1094,13 @@ void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index 330bf76a4a..fb6e4e753f 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -19,7 +19,7 @@
#include "mirror/array-inl.h"
#include "mirror/string.h"
-namespace art {
+namespace art HIDDEN {
namespace x86_64 {
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
@@ -37,13 +37,13 @@ void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instru
case DataType::Type::kInt16:
case DataType::Type::kInt32:
case DataType::Type::kInt64:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresFpuRegister());
locations->SetOut(is_zero ? Location::RequiresFpuRegister()
: Location::SameAsFirstInput());
@@ -964,7 +964,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
case DataType::Type::kInt32:
case DataType::Type::kInt64:
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
locations->SetOut(Location::SameAsFirstInput());
break;
default:
@@ -1072,13 +1072,13 @@ void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
case DataType::Type::kInt16:
case DataType::Type::kInt32:
case DataType::Type::kInt64:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 8c6b8027cd..cb1cecc45a 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -20,7 +20,6 @@
#include "art_method-inl.h"
#include "class_table.h"
#include "code_generator_utils.h"
-#include "compiled_method.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "gc/accounting/card_table.h"
@@ -36,6 +35,7 @@
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
#include "mirror/var_handle.h"
+#include "optimizing/nodes.h"
#include "scoped_thread_state_change-inl.h"
#include "thread.h"
#include "utils/assembler.h"
@@ -43,7 +43,7 @@
#include "utils/x86/assembler_x86.h"
#include "utils/x86/managed_register_x86.h"
-namespace art {
+namespace art HIDDEN {
template<class MirrorType>
class GcRoot;
@@ -503,7 +503,7 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
: SlowPathCode(instruction),
ref_(ref),
unpoison_ref_before_marking_(unpoison_ref_before_marking) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
}
const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; }
@@ -590,7 +590,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
field_addr_(field_addr),
unpoison_ref_before_marking_(unpoison_ref_before_marking),
temp_(temp) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
}
const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
@@ -744,7 +744,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
obj_(obj),
offset_(offset),
index_(index) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// If `obj` is equal to `out` or `ref`, it means the initial object
// has been overwritten by (or after) the heap object reference load
// to be instrumented, e.g.:
@@ -918,7 +918,7 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
public:
ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
: SlowPathCode(instruction), out_(out), root_(root) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
}
void EmitNativeCode(CodeGenerator* codegen) override {
@@ -967,6 +967,9 @@ class MethodEntryExitHooksSlowPathX86 : public SlowPathCode {
(instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
+ if (instruction_->IsMethodExitHook()) {
+ __ movl(EBX, Immediate(codegen->GetFrameSize()));
+ }
x86_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
@@ -1103,6 +1106,33 @@ void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
__ fs()->call(Address::Absolute(entry_point_offset));
}
+namespace detail {
+// Mark which intrinsics we don't have handcrafted code for.
+template <Intrinsics T>
+struct IsUnimplemented {
+ bool is_unimplemented = false;
+};
+
+#define TRUE_OVERRIDE(Name) \
+ template <> \
+ struct IsUnimplemented<Intrinsics::k##Name> { \
+ bool is_unimplemented = true; \
+ };
+UNIMPLEMENTED_INTRINSIC_LIST_X86(TRUE_OVERRIDE)
+#undef TRUE_OVERRIDE
+
+#include "intrinsics_list.h"
+static constexpr bool kIsIntrinsicUnimplemented[] = {
+ false, // kNone
+#define IS_UNIMPLEMENTED(Intrinsic, ...) \
+ IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+ INTRINSICS_LIST(IS_UNIMPLEMENTED)
+#undef IS_UNIMPLEMENTED
+};
+#undef INTRINSICS_LIST
+
+} // namespace detail
+
CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats)
@@ -1115,7 +1145,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
| (1 << kFakeReturnRegister),
0,
compiler_options,
- stats),
+ stats,
+ ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
block_labels_(nullptr),
location_builder_(graph, this),
instruction_visitor_(graph, this),
@@ -1197,9 +1228,21 @@ void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* inst
new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86(instruction);
codegen_->AddSlowPath(slow_path);
+ if (instruction->IsMethodExitHook()) {
+ // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
+ // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
+ // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
+ // disabled in debuggable runtime. The other bit is used when this method itself requires a
+ // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
+ __ cmpl(Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
+ __ j(kNotEqual, slow_path->GetEntryLabel());
+ }
+
uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
- int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value();
- __ cmpb(Address::Absolute(address + offset), Immediate(0));
+ MemberOffset offset = instruction->IsMethodExitHook() ?
+ instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
+ instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
+ __ cmpb(Address::Absolute(address + offset.Int32Value()), Immediate(0));
__ j(kNotEqual, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
@@ -1261,6 +1304,44 @@ void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) {
void CodeGeneratorX86::GenerateFrameEntry() {
__ cfi().SetCurrentCFAOffset(kX86WordSize); // return address
+
+ // Check if we need to generate the clinit check. We will jump to the
+ // resolution stub if the class is not initialized and the executing thread is
+ // not the thread initializing it.
+ // We do this before constructing the frame to get the correct stack trace if
+ // an exception is thrown.
+ if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
+ NearLabel continue_execution, resolution;
+ // We'll use EBP as temporary.
+ __ pushl(EBP);
+ // Check if we're visibly initialized.
+
+ // We don't emit a read barrier here to save on code size. We rely on the
+ // resolution trampoline to do a suspend check before re-entering this code.
+ __ movl(EBP, Address(kMethodRegisterArgument, ArtMethod::DeclaringClassOffset().Int32Value()));
+ __ cmpb(Address(EBP, status_byte_offset), Immediate(shifted_visibly_initialized_value));
+ __ j(kAboveEqual, &continue_execution);
+
+ // Check if we're initializing and the thread initializing is the one
+ // executing the code.
+ __ cmpb(Address(EBP, status_byte_offset), Immediate(shifted_initializing_value));
+ __ j(kBelow, &resolution);
+
+ __ movl(EBP, Address(EBP, mirror::Class::ClinitThreadIdOffset().Int32Value()));
+ __ fs()->cmpl(EBP, Address::Absolute(Thread::TidOffset<kX86PointerSize>().Int32Value()));
+ __ j(kEqual, &continue_execution);
+ __ Bind(&resolution);
+
+ __ popl(EBP);
+ // Jump to the resolution stub.
+ ThreadOffset32 entrypoint_offset =
+ GetThreadOffset<kX86PointerSize>(kQuickQuickResolutionTrampoline);
+ __ fs()->jmp(Address::Absolute(entrypoint_offset));
+
+ __ Bind(&continue_execution);
+ __ popl(EBP);
+ }
+
__ Bind(&frame_entry_label_);
bool skip_overflow_check =
IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
@@ -1619,7 +1700,7 @@ void CodeGeneratorX86::LoadFromMemoryNoBarrier(DataType::Type dst_type,
__ movsd(dst.AsFpuRegister<XmmRegister>(), src);
break;
case DataType::Type::kReference:
- DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK(!gUseReadBarrier);
__ movl(dst.AsRegister<Register>(), src);
__ MaybeUnpoisonHeapReference(dst.AsRegister<Register>());
break;
@@ -2230,12 +2311,12 @@ void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
}
}
-void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
- new (GetGraph()->GetAllocator()) LocationSummary(info);
+void LocationsBuilderX86::VisitNop(HNop* nop) {
+ new (GetGraph()->GetAllocator()) LocationSummary(nop);
}
-void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo*) {
- // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
+void InstructionCodeGeneratorX86::VisitNop(HNop*) {
+ // The environment recording already happened in CodeGenerator::Compile.
}
void CodeGeneratorX86::IncreaseFrame(size_t adjustment) {
@@ -2913,7 +2994,7 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
case DataType::Type::kInt64: {
HInstruction* input = conversion->InputAt(0);
Location input_location = input->IsConstant()
- ? Location::ConstantLocation(input->AsConstant())
+ ? Location::ConstantLocation(input)
: Location::RegisterPairLocation(EAX, EDX);
locations->SetInAt(0, input_location);
// Make the output overlap to please the register allocator. This greatly simplifies
@@ -5689,13 +5770,10 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke
DCHECK_EQ(size, linker_patches->size());
}
-void CodeGeneratorX86::MarkGCCard(Register temp,
- Register card,
- Register object,
- Register value,
- bool value_can_be_null) {
+void CodeGeneratorX86::MarkGCCard(
+ Register temp, Register card, Register object, Register value, bool emit_null_check) {
NearLabel is_null;
- if (value_can_be_null) {
+ if (emit_null_check) {
__ testl(value, value);
__ j(kEqual, &is_null);
}
@@ -5720,7 +5798,7 @@ void CodeGeneratorX86::MarkGCCard(Register temp,
// (no need to explicitly load `kCardDirty` as an immediate value).
__ movb(Address(temp, card, TIMES_1, 0),
X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
- if (value_can_be_null) {
+ if (emit_null_check) {
__ Bind(&is_null);
}
}
@@ -5731,11 +5809,11 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI
instruction->IsPredicatedInstanceFieldGet());
bool object_field_get_with_read_barrier =
- kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
+ gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
- kEmitCompilerReadBarrier
+ gUseReadBarrier
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall);
if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
@@ -5793,7 +5871,7 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
if (load_type == DataType::Type::kReference) {
// /* HeapReference<Object> */ out = *(base + offset)
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(
@@ -5824,7 +5902,9 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
}
}
-void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) {
+void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction,
+ const FieldInfo& field_info,
+ WriteBarrierKind write_barrier_kind) {
DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
LocationSummary* locations =
@@ -5861,10 +5941,13 @@ void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldI
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
- // Temporary registers for the write barrier.
- locations->AddTemp(Location::RequiresRegister()); // May be used for reference poisoning too.
- // Ensure the card is in a byte register.
- locations->AddTemp(Location::RegisterLocation(ECX));
+ if (write_barrier_kind != WriteBarrierKind::kDontEmit) {
+ locations->AddTemp(Location::RequiresRegister());
+ // Ensure the card is in a byte register.
+ locations->AddTemp(Location::RegisterLocation(ECX));
+ } else if (kPoisonHeapReferences) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
}
}
@@ -5875,7 +5958,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
Address field_addr,
Register base,
bool is_volatile,
- bool value_can_be_null) {
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind) {
LocationSummary* locations = instruction->GetLocations();
Location value = locations->InAt(value_index);
bool needs_write_barrier =
@@ -5988,10 +6072,15 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
- if (needs_write_barrier) {
+ if (needs_write_barrier && write_barrier_kind != WriteBarrierKind::kDontEmit) {
Register temp = locations->GetTemp(0).AsRegister<Register>();
Register card = locations->GetTemp(1).AsRegister<Register>();
- codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>(), value_can_be_null);
+ codegen_->MarkGCCard(
+ temp,
+ card,
+ base,
+ value.AsRegister<Register>(),
+ value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck);
}
if (is_volatile) {
@@ -6001,7 +6090,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
- bool value_can_be_null) {
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind) {
DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
LocationSummary* locations = instruction->GetLocations();
@@ -6026,7 +6116,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
field_addr,
base,
is_volatile,
- value_can_be_null);
+ value_can_be_null,
+ write_barrier_kind);
if (is_predicated) {
__ Bind(&pred_is_null);
@@ -6042,19 +6133,25 @@ void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instructi
}
void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo());
+ HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
}
void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
+ HandleFieldSet(instruction,
+ instruction->GetFieldInfo(),
+ instruction->GetValueCanBeNull(),
+ instruction->GetWriteBarrierKind());
}
void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo());
+ HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
}
void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
+ HandleFieldSet(instruction,
+ instruction->GetFieldInfo(),
+ instruction->GetValueCanBeNull(),
+ instruction->GetWriteBarrierKind());
}
void LocationsBuilderX86::VisitPredicatedInstanceFieldGet(
@@ -6202,7 +6299,7 @@ void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
bool object_array_get_with_read_barrier =
- kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
+ gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
object_array_get_with_read_barrier
@@ -6244,7 +6341,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
// /* HeapReference<Object> */ out =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
codegen_->GenerateArrayLoadWithBakerReadBarrier(
@@ -6315,10 +6412,12 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
}
if (needs_write_barrier) {
- // Temporary registers for the write barrier.
- locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
- // Ensure the card is in a byte register.
- locations->AddTemp(Location::RegisterLocation(ECX));
+ // Used by reference poisoning or emitting write barrier.
+ locations->AddTemp(Location::RequiresRegister());
+ if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
+ // Only used when emitting a write barrier. Ensure the card is in a byte register.
+ locations->AddTemp(Location::RegisterLocation(ECX));
+ }
}
}
@@ -6435,9 +6534,16 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
}
}
- Register card = locations->GetTemp(1).AsRegister<Register>();
- codegen_->MarkGCCard(
- temp, card, array, value.AsRegister<Register>(), /* value_can_be_null= */ false);
+ if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
+ DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck)
+ << " Already null checked so we shouldn't do it again.";
+ Register card = locations->GetTemp(1).AsRegister<Register>();
+ codegen_->MarkGCCard(temp,
+ card,
+ array,
+ value.AsRegister<Register>(),
+ /* emit_null_check= */ false);
+ }
if (can_value_be_null) {
DCHECK(do_store.IsLinked());
@@ -7057,7 +7163,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
load_kind == HLoadClass::LoadKind::kBssEntryPackage);
- const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+ const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage();
LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
@@ -7071,7 +7177,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
}
locations->SetOut(Location::RequiresRegister());
if (call_kind == LocationSummary::kCallOnSlowPath && cls->HasPcRelativeLoadKind()) {
- if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ if (!gUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the type resolution and/or initialization to save everything.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
@@ -7109,7 +7215,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
bool generate_null_check = false;
const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
? kWithoutReadBarrier
- : kCompilerReadBarrierOption;
+ : gCompilerReadBarrierOption;
switch (load_kind) {
case HLoadClass::LoadKind::kReferrersClass: {
DCHECK(!cls->CanCallRuntime());
@@ -7233,12 +7339,6 @@ void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
SlowPathCode* slow_path, Register class_reg) {
- constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
- const size_t status_byte_offset =
- mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
- constexpr uint32_t shifted_visibly_initialized_value =
- enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
-
__ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value));
__ j(kBelow, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
@@ -7296,7 +7396,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
} else {
locations->SetOut(Location::RequiresRegister());
if (load_kind == HLoadString::LoadKind::kBssEntry) {
- if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ if (!gUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the pResolveString to save everything.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
@@ -7345,7 +7445,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S
Address address = Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
// /* GcRoot<mirror::String> */ out = *address /* PC-relative */
- GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
+ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
// No need for memory fence, thanks to the x86 memory model.
SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
codegen_->AddSlowPath(slow_path);
@@ -7365,7 +7465,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S
Label* fixup_label = codegen_->NewJitRootStringPatch(
load->GetDexFile(), load->GetStringIndex(), load->GetString());
// /* GcRoot<mirror::String> */ out = *address
- GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
+ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
return;
}
default:
@@ -7416,7 +7516,7 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
// Temp is used for read barrier.
static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
- if (kEmitCompilerReadBarrier &&
+ if (gUseReadBarrier &&
!kUseBakerReadBarrier &&
(type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
@@ -7466,9 +7566,9 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
}
locations->SetInAt(0, Location::RequiresRegister());
if (type_check_kind == TypeCheckKind::kBitstringCheck) {
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
- locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
- locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
} else {
locations->SetInAt(1, Location::Any());
}
@@ -7734,9 +7834,9 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
// a memory address.
locations->SetInAt(1, Location::RequiresRegister());
} else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
- locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
- locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
} else {
locations->SetInAt(1, Location::Any());
}
@@ -8188,7 +8288,7 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
ReadBarrierOption read_barrier_option) {
Register out_reg = out.AsRegister<Register>();
if (read_barrier_option == kWithReadBarrier) {
- CHECK(kEmitCompilerReadBarrier);
+ CHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
@@ -8222,7 +8322,7 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
Register out_reg = out.AsRegister<Register>();
Register obj_reg = obj.AsRegister<Register>();
if (read_barrier_option == kWithReadBarrier) {
- CHECK(kEmitCompilerReadBarrier);
+ CHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
@@ -8250,7 +8350,7 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
ReadBarrierOption read_barrier_option) {
Register root_reg = root.AsRegister<Register>();
if (read_barrier_option == kWithReadBarrier) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used:
@@ -8314,7 +8414,7 @@ void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr
Register obj,
uint32_t offset,
bool needs_null_check) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
// /* HeapReference<Object> */ ref = *(obj + offset)
@@ -8328,7 +8428,7 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr
uint32_t data_offset,
Location index,
bool needs_null_check) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
static_assert(
@@ -8347,7 +8447,7 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
bool needs_null_check,
bool always_update_field,
Register* temp) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
// In slow path based read barriers, the read barrier call is
@@ -8428,7 +8528,7 @@ void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
Location obj,
uint32_t offset,
Location index) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// Insert a slow path based read barrier *after* the reference load.
//
@@ -8455,7 +8555,7 @@ void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
Location obj,
uint32_t offset,
Location index) {
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Baker's read barriers shall be handled by the fast path
// (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
DCHECK(!kUseBakerReadBarrier);
@@ -8470,7 +8570,7 @@ void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
Location out,
Location root) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// Insert a slow path based read barrier *after* the GC root load.
//
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 75c5cebb5e..d27155f31d 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -19,6 +19,7 @@
#include "arch/x86/instruction_set_features_x86.h"
#include "base/enums.h"
+#include "base/macros.h"
#include "code_generator.h"
#include "dex/dex_file_types.h"
#include "driver/compiler_options.h"
@@ -26,7 +27,7 @@
#include "parallel_move_resolver.h"
#include "utils/x86/assembler_x86.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
// Use a local definition to prevent copying mistakes.
@@ -47,6 +48,61 @@ static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1, XMM
static constexpr size_t kRuntimeParameterFpuRegistersLength =
arraysize(kRuntimeParameterFpuRegisters);
+#define UNIMPLEMENTED_INTRINSIC_LIST_X86(V) \
+ V(MathRoundDouble) \
+ V(FloatIsInfinite) \
+ V(DoubleIsInfinite) \
+ V(IntegerHighestOneBit) \
+ V(LongHighestOneBit) \
+ V(LongDivideUnsigned) \
+ V(CRC32Update) \
+ V(CRC32UpdateBytes) \
+ V(CRC32UpdateByteBuffer) \
+ V(FP16ToFloat) \
+ V(FP16ToHalf) \
+ V(FP16Floor) \
+ V(FP16Ceil) \
+ V(FP16Rint) \
+ V(FP16Greater) \
+ V(FP16GreaterEquals) \
+ V(FP16Less) \
+ V(FP16LessEquals) \
+ V(FP16Compare) \
+ V(FP16Min) \
+ V(FP16Max) \
+ V(MathMultiplyHigh) \
+ V(StringStringIndexOf) \
+ V(StringStringIndexOfAfter) \
+ V(StringBufferAppend) \
+ V(StringBufferLength) \
+ V(StringBufferToString) \
+ V(StringBuilderAppendObject) \
+ V(StringBuilderAppendString) \
+ V(StringBuilderAppendCharSequence) \
+ V(StringBuilderAppendCharArray) \
+ V(StringBuilderAppendBoolean) \
+ V(StringBuilderAppendChar) \
+ V(StringBuilderAppendInt) \
+ V(StringBuilderAppendLong) \
+ V(StringBuilderAppendFloat) \
+ V(StringBuilderAppendDouble) \
+ V(StringBuilderLength) \
+ V(StringBuilderToString) \
+ /* 1.8 */ \
+ V(UnsafeGetAndAddInt) \
+ V(UnsafeGetAndAddLong) \
+ V(UnsafeGetAndSetInt) \
+ V(UnsafeGetAndSetLong) \
+ V(UnsafeGetAndSetObject) \
+ V(MethodHandleInvokeExact) \
+ V(MethodHandleInvoke) \
+ /* OpenJDK 11 */ \
+ V(JdkUnsafeGetAndAddInt) \
+ V(JdkUnsafeGetAndAddLong) \
+ V(JdkUnsafeGetAndSetInt) \
+ V(JdkUnsafeGetAndSetLong) \
+ V(JdkUnsafeGetAndSetObject)
+
class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> {
public:
InvokeRuntimeCallingConvention()
@@ -196,7 +252,9 @@ class LocationsBuilderX86 : public HGraphVisitor {
void HandleInvoke(HInvoke* invoke);
void HandleCondition(HCondition* condition);
void HandleShift(HBinaryOperation* instruction);
- void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
+ void HandleFieldSet(HInstruction* instruction,
+ const FieldInfo& field_info,
+ WriteBarrierKind write_barrier_kind);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
bool CpuHasAvxFeatureFlag();
bool CpuHasAvx2FeatureFlag();
@@ -249,7 +307,8 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
Address field_addr,
Register base,
bool is_volatile,
- bool value_can_be_null);
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind);
private:
// Generate code for the given suspend check. If not null, `successor`
@@ -279,7 +338,8 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
void HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
- bool value_can_be_null);
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
// Generate a heap reference load using one register `out`:
@@ -519,11 +579,8 @@ class CodeGeneratorX86 : public CodeGenerator {
void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override;
// Emit a write barrier.
- void MarkGCCard(Register temp,
- Register card,
- Register object,
- Register value,
- bool value_can_be_null);
+ void MarkGCCard(
+ Register temp, Register card, Register object, Register value, bool emit_null_check);
void GenerateMemoryBarrier(MemBarrierKind kind);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 511917a735..eea6b204fa 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -21,7 +21,6 @@
#include "class_root-inl.h"
#include "class_table.h"
#include "code_generator_utils.h"
-#include "compiled_method.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
#include "gc/space/image_space.h"
@@ -37,6 +36,7 @@
#include "mirror/class-inl.h"
#include "mirror/object_reference.h"
#include "mirror/var_handle.h"
+#include "optimizing/nodes.h"
#include "scoped_thread_state_change-inl.h"
#include "thread.h"
#include "utils/assembler.h"
@@ -45,7 +45,7 @@
#include "utils/x86_64/constants_x86_64.h"
#include "utils/x86_64/managed_register_x86_64.h"
-namespace art {
+namespace art HIDDEN {
template<class MirrorType>
class GcRoot;
@@ -510,7 +510,7 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
: SlowPathCode(instruction),
ref_(ref),
unpoison_ref_before_marking_(unpoison_ref_before_marking) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
}
const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; }
@@ -601,7 +601,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
unpoison_ref_before_marking_(unpoison_ref_before_marking),
temp1_(temp1),
temp2_(temp2) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
}
const char* GetDescription() const override {
@@ -761,7 +761,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
obj_(obj),
offset_(offset),
index_(index) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// If `obj` is equal to `out` or `ref`, it means the initial
// object has been overwritten by (or after) the heap object
// reference load to be instrumented, e.g.:
@@ -937,7 +937,7 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
public:
ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
: SlowPathCode(instruction), out_(out), root_(root) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
}
void EmitNativeCode(CodeGenerator* codegen) override {
@@ -986,6 +986,10 @@ class MethodEntryExitHooksSlowPathX86_64 : public SlowPathCode {
(instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
+ if (instruction_->IsMethodExitHook()) {
+ // Load FrameSize to pass to the exit hook.
+ __ movq(CpuRegister(R8), Immediate(codegen->GetFrameSize()));
+ }
x86_64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
@@ -1490,6 +1494,33 @@ void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
__ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true));
}
+namespace detail {
+// Mark which intrinsics we don't have handcrafted code for.
+template <Intrinsics T>
+struct IsUnimplemented {
+ bool is_unimplemented = false;
+};
+
+#define TRUE_OVERRIDE(Name) \
+ template <> \
+ struct IsUnimplemented<Intrinsics::k##Name> { \
+ bool is_unimplemented = true; \
+ };
+UNIMPLEMENTED_INTRINSIC_LIST_X86_64(TRUE_OVERRIDE)
+#undef TRUE_OVERRIDE
+
+#include "intrinsics_list.h"
+static constexpr bool kIsIntrinsicUnimplemented[] = {
+ false, // kNone
+#define IS_UNIMPLEMENTED(Intrinsic, ...) \
+ IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+ INTRINSICS_LIST(IS_UNIMPLEMENTED)
+#undef IS_UNIMPLEMENTED
+};
+#undef INTRINSICS_LIST
+
+} // namespace detail
+
static constexpr int kNumberOfCpuRegisterPairs = 0;
// Use a fake return address register to mimic Quick.
static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
@@ -1506,7 +1537,8 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
arraysize(kFpuCalleeSaves)),
compiler_options,
- stats),
+ stats,
+ ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
block_labels_(nullptr),
location_builder_(graph, this),
instruction_visitor_(graph, this),
@@ -1561,9 +1593,22 @@ void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* i
new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86_64(instruction);
codegen_->AddSlowPath(slow_path);
+ if (instruction->IsMethodExitHook()) {
+ // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
+ // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
+ // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
+ // disabled in debuggable runtime. The other bit is used when this method itself requires a
+ // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
+ __ cmpl(Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()),
+ Immediate(0));
+ __ j(kNotEqual, slow_path->GetEntryLabel());
+ }
+
uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
- int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value();
- __ movq(CpuRegister(TMP), Immediate(address + offset));
+ MemberOffset offset = instruction->IsMethodExitHook() ?
+ instrumentation::Instrumentation::HaveMethodExitListenersOffset()
+ : instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
+ __ movq(CpuRegister(TMP), Immediate(address + offset.Int32Value()));
__ cmpb(Address(CpuRegister(TMP), 0), Immediate(0));
__ j(kNotEqual, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
@@ -1653,6 +1698,44 @@ void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) {
void CodeGeneratorX86_64::GenerateFrameEntry() {
__ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address
+
+ // Check if we need to generate the clinit check. We will jump to the
+ // resolution stub if the class is not initialized and the executing thread is
+ // not the thread initializing it.
+ // We do this before constructing the frame to get the correct stack trace if
+ // an exception is thrown.
+ if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
+ NearLabel resolution;
+ // Check if we're visibly initialized.
+
+ // We don't emit a read barrier here to save on code size. We rely on the
+ // resolution trampoline to do a suspend check before re-entering this code.
+ __ movl(CpuRegister(TMP),
+ Address(CpuRegister(kMethodRegisterArgument),
+ ArtMethod::DeclaringClassOffset().Int32Value()));
+ __ cmpb(Address(CpuRegister(TMP), status_byte_offset),
+ Immediate(shifted_visibly_initialized_value));
+ __ j(kAboveEqual, &frame_entry_label_);
+
+ // Check if we're initializing and the thread initializing is the one
+ // executing the code.
+ __ cmpb(Address(CpuRegister(TMP), status_byte_offset), Immediate(shifted_initializing_value));
+ __ j(kBelow, &resolution);
+
+ __ movl(CpuRegister(TMP),
+ Address(CpuRegister(TMP), mirror::Class::ClinitThreadIdOffset().Int32Value()));
+ __ gs()->cmpl(
+ CpuRegister(TMP),
+ Address::Absolute(Thread::TidOffset<kX86_64PointerSize>().Int32Value(), /*no_rip=*/ true));
+ __ j(kEqual, &frame_entry_label_);
+ __ Bind(&resolution);
+
+ // Jump to the resolution stub.
+ ThreadOffset64 entrypoint_offset =
+ GetThreadOffset<kX86_64PointerSize>(kQuickQuickResolutionTrampoline);
+ __ gs()->jmp(Address::Absolute(entrypoint_offset, /*no_rip=*/ true));
+ }
+
__ Bind(&frame_entry_label_);
bool skip_overflow_check = IsLeafMethod()
&& !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
@@ -2274,12 +2357,12 @@ void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
}
}
-void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
- new (GetGraph()->GetAllocator()) LocationSummary(info);
+void LocationsBuilderX86_64::VisitNop(HNop* nop) {
+ new (GetGraph()->GetAllocator()) LocationSummary(nop);
}
-void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
- // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
+void InstructionCodeGeneratorX86_64::VisitNop(HNop*) {
+ // The environment recording already happened in CodeGenerator::Compile.
}
void CodeGeneratorX86_64::IncreaseFrame(size_t adjustment) {
@@ -5013,7 +5096,7 @@ void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
instruction->IsPredicatedInstanceFieldGet());
bool object_field_get_with_read_barrier =
- kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
+ gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
@@ -5064,7 +5147,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
if (load_type == DataType::Type::kReference) {
// /* HeapReference<Object> */ out = *(base + offset)
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(
@@ -5119,6 +5202,9 @@ void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
}
}
+
+ // TODO(solanes): We could reduce the temp usage but it requires some non-trivial refactoring of
+ // InstructionCodeGeneratorX86_64::HandleFieldSet.
if (needs_write_barrier) {
// Temporary registers for the write barrier.
locations->AddTemp(Location::RequiresRegister());
@@ -5180,7 +5266,8 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
bool is_volatile,
bool is_atomic,
bool value_can_be_null,
- bool byte_swap) {
+ bool byte_swap,
+ WriteBarrierKind write_barrier_kind) {
LocationSummary* locations = instruction->GetLocations();
Location value = locations->InAt(value_index);
@@ -5298,10 +5385,16 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
- if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(value_index))) {
+ if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(value_index)) &&
+ write_barrier_kind != WriteBarrierKind::kDontEmit) {
CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
- codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
+ codegen_->MarkGCCard(
+ temp,
+ card,
+ base,
+ value.AsRegister<CpuRegister>(),
+ value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck);
}
if (is_volatile) {
@@ -5311,7 +5404,8 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
- bool value_can_be_null) {
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind) {
DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
LocationSummary* locations = instruction->GetLocations();
@@ -5336,7 +5430,9 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
base,
is_volatile,
/*is_atomic=*/ false,
- value_can_be_null);
+ value_can_be_null,
+ /*byte_swap=*/ false,
+ write_barrier_kind);
if (is_predicated) {
__ Bind(&pred_is_null);
@@ -5348,7 +5444,10 @@ void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instructio
}
void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
+ HandleFieldSet(instruction,
+ instruction->GetFieldInfo(),
+ instruction->GetValueCanBeNull(),
+ instruction->GetWriteBarrierKind());
}
void LocationsBuilderX86_64::VisitPredicatedInstanceFieldGet(
@@ -5388,7 +5487,10 @@ void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
}
void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
+ HandleFieldSet(instruction,
+ instruction->GetFieldInfo(),
+ instruction->GetValueCanBeNull(),
+ instruction->GetWriteBarrierKind());
}
void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
@@ -5513,7 +5615,7 @@ void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
bool object_array_get_with_read_barrier =
- kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
+ gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
object_array_get_with_read_barrier
@@ -5551,7 +5653,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
// /* HeapReference<Object> */ out =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
codegen_->GenerateArrayLoadWithBakerReadBarrier(
@@ -5619,9 +5721,12 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
}
if (needs_write_barrier) {
- // Temporary registers for the write barrier.
- locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
+ // Used by reference poisoning or emitting write barrier.
locations->AddTemp(Location::RequiresRegister());
+ if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
+ // Only used when emitting a write barrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
}
@@ -5739,9 +5844,16 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
}
}
- CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
- codegen_->MarkGCCard(
- temp, card, array, value.AsRegister<CpuRegister>(), /* value_can_be_null= */ false);
+ if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
+ DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck)
+ << " Already null checked so we shouldn't do it again.";
+ CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
+ codegen_->MarkGCCard(temp,
+ card,
+ array,
+ value.AsRegister<CpuRegister>(),
+ /* emit_null_check= */ false);
+ }
if (can_value_be_null) {
DCHECK(do_store.IsLinked());
@@ -5940,9 +6052,9 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
CpuRegister card,
CpuRegister object,
CpuRegister value,
- bool value_can_be_null) {
+ bool emit_null_check) {
NearLabel is_null;
- if (value_can_be_null) {
+ if (emit_null_check) {
__ testl(value, value);
__ j(kEqual, &is_null);
}
@@ -5967,7 +6079,7 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
// of the card to mark; and 2. to load the `kCardDirty` value) saves a load
// (no need to explicitly load `kCardDirty` as an immediate value).
__ movb(Address(temp, card, TIMES_1, 0), card);
- if (value_can_be_null) {
+ if (emit_null_check) {
__ Bind(&is_null);
}
}
@@ -6282,12 +6394,6 @@ void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
SlowPathCode* slow_path, CpuRegister class_reg) {
- constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
- const size_t status_byte_offset =
- mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
- constexpr uint32_t shifted_visibly_initialized_value =
- enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
-
__ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value));
__ j(kBelow, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
@@ -6352,7 +6458,7 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
load_kind == HLoadClass::LoadKind::kBssEntryPackage);
- const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+ const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage();
LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
@@ -6366,7 +6472,7 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
}
locations->SetOut(Location::RequiresRegister());
if (load_kind == HLoadClass::LoadKind::kBssEntry) {
- if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ if (!gUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the type resolution and/or initialization to save everything.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
@@ -6403,7 +6509,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
? kWithoutReadBarrier
- : kCompilerReadBarrierOption;
+ : gCompilerReadBarrierOption;
bool generate_null_check = false;
switch (load_kind) {
case HLoadClass::LoadKind::kReferrersClass: {
@@ -6550,7 +6656,7 @@ void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
} else {
locations->SetOut(Location::RequiresRegister());
if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
- if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ if (!gUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the pResolveString to save everything.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
@@ -6598,7 +6704,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA
/* no_rip= */ false);
Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
// /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
- GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
+ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
// No need for memory fence, thanks to the x86-64 memory model.
SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
codegen_->AddSlowPath(slow_path);
@@ -6619,7 +6725,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA
Label* fixup_label = codegen_->NewJitRootStringPatch(
load->GetDexFile(), load->GetStringIndex(), load->GetString());
// /* GcRoot<mirror::String> */ out = *address
- GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
+ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
return;
}
default:
@@ -6672,7 +6778,7 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
// Temp is used for read barrier.
static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
- if (kEmitCompilerReadBarrier &&
+ if (gUseReadBarrier &&
!kUseBakerReadBarrier &&
(type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
@@ -6722,9 +6828,9 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
}
locations->SetInAt(0, Location::RequiresRegister());
if (type_check_kind == TypeCheckKind::kBitstringCheck) {
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
- locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
- locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
} else {
locations->SetInAt(1, Location::Any());
}
@@ -7000,9 +7106,9 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
// a memory address.
locations->SetInAt(1, Location::RequiresRegister());
} else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
- locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
- locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
} else {
locations->SetInAt(1, Location::Any());
}
@@ -7426,7 +7532,7 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
ReadBarrierOption read_barrier_option) {
CpuRegister out_reg = out.AsRegister<CpuRegister>();
if (read_barrier_option == kWithReadBarrier) {
- CHECK(kEmitCompilerReadBarrier);
+ CHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
@@ -7460,7 +7566,7 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
CpuRegister out_reg = out.AsRegister<CpuRegister>();
CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
if (read_barrier_option == kWithReadBarrier) {
- CHECK(kEmitCompilerReadBarrier);
+ CHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
@@ -7488,7 +7594,7 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
ReadBarrierOption read_barrier_option) {
CpuRegister root_reg = root.AsRegister<CpuRegister>();
if (read_barrier_option == kWithReadBarrier) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used:
@@ -7552,7 +7658,7 @@ void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in
CpuRegister obj,
uint32_t offset,
bool needs_null_check) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
// /* HeapReference<Object> */ ref = *(obj + offset)
@@ -7566,7 +7672,7 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in
uint32_t data_offset,
Location index,
bool needs_null_check) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
static_assert(
@@ -7586,7 +7692,7 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
bool always_update_field,
CpuRegister* temp1,
CpuRegister* temp2) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
// In slow path based read barriers, the read barrier call is
@@ -7668,7 +7774,7 @@ void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
Location obj,
uint32_t offset,
Location index) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// Insert a slow path based read barrier *after* the reference load.
//
@@ -7695,7 +7801,7 @@ void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction
Location obj,
uint32_t offset,
Location index) {
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Baker's read barriers shall be handled by the fast path
// (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
DCHECK(!kUseBakerReadBarrier);
@@ -7710,7 +7816,7 @@ void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction
void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
Location out,
Location root) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// Insert a slow path based read barrier *after* the GC root load.
//
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 39a72d8211..dff2e799e0 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -18,13 +18,14 @@
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
#include "arch/x86_64/instruction_set_features_x86_64.h"
+#include "base/macros.h"
#include "code_generator.h"
#include "driver/compiler_options.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
#include "utils/x86_64/assembler_x86_64.h"
-namespace art {
+namespace art HIDDEN {
namespace x86_64 {
// Use a local definition to prevent copying mistakes.
@@ -52,6 +53,53 @@ static constexpr size_t kRuntimeParameterFpuRegistersLength =
// these are not clobbered by any direct call to native code (such as math intrinsics).
static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 };
+#define UNIMPLEMENTED_INTRINSIC_LIST_X86_64(V) \
+ V(CRC32Update) \
+ V(CRC32UpdateBytes) \
+ V(CRC32UpdateByteBuffer) \
+ V(FP16ToFloat) \
+ V(FP16ToHalf) \
+ V(FP16Floor) \
+ V(FP16Ceil) \
+ V(FP16Rint) \
+ V(FP16Greater) \
+ V(FP16GreaterEquals) \
+ V(FP16Less) \
+ V(FP16LessEquals) \
+ V(FP16Compare) \
+ V(FP16Min) \
+ V(FP16Max) \
+ V(StringStringIndexOf) \
+ V(StringStringIndexOfAfter) \
+ V(StringBufferAppend) \
+ V(StringBufferLength) \
+ V(StringBufferToString) \
+ V(StringBuilderAppendObject) \
+ V(StringBuilderAppendString) \
+ V(StringBuilderAppendCharSequence) \
+ V(StringBuilderAppendCharArray) \
+ V(StringBuilderAppendBoolean) \
+ V(StringBuilderAppendChar) \
+ V(StringBuilderAppendInt) \
+ V(StringBuilderAppendLong) \
+ V(StringBuilderAppendFloat) \
+ V(StringBuilderAppendDouble) \
+ V(StringBuilderLength) \
+ V(StringBuilderToString) \
+ /* 1.8 */ \
+ V(UnsafeGetAndAddInt) \
+ V(UnsafeGetAndAddLong) \
+ V(UnsafeGetAndSetInt) \
+ V(UnsafeGetAndSetLong) \
+ V(UnsafeGetAndSetObject) \
+ V(MethodHandleInvokeExact) \
+ V(MethodHandleInvoke) \
+ /* OpenJDK 11 */ \
+ V(JdkUnsafeGetAndAddInt) \
+ V(JdkUnsafeGetAndAddLong) \
+ V(JdkUnsafeGetAndSetInt) \
+ V(JdkUnsafeGetAndSetLong) \
+ V(JdkUnsafeGetAndSetObject)
class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> {
public:
@@ -250,7 +298,8 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
bool is_volatile,
bool is_atomic,
bool value_can_be_null,
- bool byte_swap = false);
+ bool byte_swap,
+ WriteBarrierKind write_barrier_kind);
void Bswap(Location value, DataType::Type type, CpuRegister* temp = nullptr);
@@ -273,7 +322,8 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
void HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
- bool value_can_be_null);
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type);
@@ -435,7 +485,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
CpuRegister card,
CpuRegister object,
CpuRegister value,
- bool value_can_be_null);
+ bool emit_null_check);
void GenerateMemoryBarrier(MemBarrierKind kind);
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
index 766bb01978..d759a16f48 100644
--- a/compiler/optimizing/code_sinking.cc
+++ b/compiler/optimizing/code_sinking.cc
@@ -19,30 +19,55 @@
#include "base/arena_bit_vector.h"
#include "base/array_ref.h"
#include "base/bit_vector-inl.h"
+#include "base/globals.h"
#include "base/logging.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "common_dominator.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
bool CodeSinking::Run() {
- HBasicBlock* exit = graph_->GetExitBlock();
- if (exit == nullptr) {
+ if (graph_->GetExitBlock() == nullptr) {
// Infinite loop, just bail.
return false;
}
+
+ UncommonBranchSinking();
+ ReturnSinking();
+ return true;
+}
+
+void CodeSinking::UncommonBranchSinking() {
+ HBasicBlock* exit = graph_->GetExitBlock();
+ DCHECK(exit != nullptr);
// TODO(ngeoffray): we do not profile branches yet, so use throw instructions
// as an indicator of an uncommon branch.
for (HBasicBlock* exit_predecessor : exit->GetPredecessors()) {
HInstruction* last = exit_predecessor->GetLastInstruction();
+
+ // TryBoundary instructions are sometimes inserted between the last instruction (e.g. Throw,
+ // Return) and Exit. We don't want to use that instruction for our "uncommon branch" heuristic
+ // because they are not as good an indicator as throwing branches, so we skip them and fetch the
+ // actual last instruction.
+ if (last->IsTryBoundary()) {
+ // We have an exit try boundary. Fetch the previous instruction.
+ DCHECK(!last->AsTryBoundary()->IsEntry());
+ if (last->GetPrevious() == nullptr) {
+ DCHECK(exit_predecessor->IsSingleTryBoundary());
+ exit_predecessor = exit_predecessor->GetSinglePredecessor();
+ last = exit_predecessor->GetLastInstruction();
+ } else {
+ last = last->GetPrevious();
+ }
+ }
+
// Any predecessor of the exit that does not return, throws an exception.
if (!last->IsReturn() && !last->IsReturnVoid()) {
SinkCodeToUncommonBranch(exit_predecessor);
}
}
- return true;
}
static bool IsInterestingInstruction(HInstruction* instruction) {
@@ -88,7 +113,7 @@ static bool IsInterestingInstruction(HInstruction* instruction) {
// We can only store on local allocations. Other heap references can
// be escaping. Note that allocations can escape too, but we only move
- // allocations if their users can move to, or are in the list of
+ // allocations if their users can move too, or are in the list of
// post dominated blocks.
if (instruction->IsInstanceFieldSet()) {
if (!instruction->InputAt(0)->IsNewInstance()) {
@@ -102,7 +127,7 @@ static bool IsInterestingInstruction(HInstruction* instruction) {
}
}
- // Heap accesses cannot go pass instructions that have memory side effects, which
+ // Heap accesses cannot go past instructions that have memory side effects, which
// we are not tracking here. Note that the load/store elimination optimization
// runs before this optimization, and should have removed interesting ones.
// In theory, we could handle loads of local allocations, but this is currently
@@ -171,7 +196,6 @@ static bool ShouldFilterUse(HInstruction* instruction,
return false;
}
-
// Find the ideal position for moving `instruction`. If `filter` is true,
// we filter out store instructions to that instruction, which are processed
// first in the step (3) of the sinking algorithm.
@@ -210,56 +234,52 @@ static HInstruction* FindIdealPosition(HInstruction* instruction,
return nullptr;
}
- // Move to the first dominator not in a loop, if we can.
- while (target_block->IsInLoop()) {
+ // Move to the first dominator not in a loop, if we can. We only do this if we are trying to hoist
+ // `instruction` out of a loop it wasn't a part of.
+ const HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation();
+ while (target_block->IsInLoop() && target_block->GetLoopInformation() != loop_info) {
if (!post_dominated.IsBitSet(target_block->GetDominator()->GetBlockId())) {
break;
}
target_block = target_block->GetDominator();
DCHECK(target_block != nullptr);
}
- const bool was_in_loop = target_block->IsInLoop();
-
- // For throwing instructions we can move them into:
- // * Blocks that are not part of a try
- // * Catch blocks are suitable as well, as long as they are not part of an outer try.
- // * Blocks that are part of the same try that the instrucion was already in.
- //
- // We cannot move an instruction that can throw into a try that said instruction is not a part of
- // already, as that would mean it will throw into a different catch block. If we detect that
- // `target_block` is not a valid block to move `instruction` to, we traverse up the dominator tree
- // to find if we have a suitable block.
- while (instruction->CanThrow() && target_block->GetTryCatchInformation() != nullptr) {
- if (target_block->IsCatchBlock()) {
- // If the catch block has an xhandler, it means it is inside of an outer try.
- const bool inside_of_another_try_catch = target_block->GetSuccessors().size() != 1;
- if (!inside_of_another_try_catch) {
- // If we have a catch block, it's okay to sink as long as that catch is not inside of
- // another try catch.
- break;
+
+ if (instruction->CanThrow()) {
+ // Consistency check: We shouldn't land in a loop if we weren't in one before traversing up the
+ // dominator tree regarding try catches.
+ const bool was_in_loop = target_block->IsInLoop();
+
+ // We cannot move an instruction that can throw into a try that said instruction is not a part
+ // of already, as that would mean it will throw into a different catch block. In short, for
+ // throwing instructions:
+ // * If the throwing instruction is part of a try, they should only be sunk into that same try.
+ // * If the throwing instruction is not part of any try, they shouldn't be sunk to any try.
+ if (instruction->GetBlock()->IsTryBlock()) {
+ const HTryBoundary& try_entry =
+ instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry();
+ while (!(target_block->IsTryBlock() &&
+ try_entry.HasSameExceptionHandlersAs(
+ target_block->GetTryCatchInformation()->GetTryEntry()))) {
+ target_block = target_block->GetDominator();
+ if (!post_dominated.IsBitSet(target_block->GetBlockId())) {
+ // We couldn't find a suitable block.
+ return nullptr;
+ }
}
} else {
- DCHECK(target_block->IsTryBlock());
- if (instruction->GetBlock()->IsTryBlock() &&
- instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry().GetId() ==
- target_block->GetTryCatchInformation()->GetTryEntry().GetId()) {
- // Sink within the same try block is allowed.
- break;
+ // Search for the first block also not in a try block
+ while (target_block->IsTryBlock()) {
+ target_block = target_block->GetDominator();
+ if (!post_dominated.IsBitSet(target_block->GetBlockId())) {
+ // We couldn't find a suitable block.
+ return nullptr;
+ }
}
}
- // We are now in the case where we would be moving to a different try. Since we don't want
- // that, traverse up the dominator tree to find a suitable block.
- if (!post_dominated.IsBitSet(target_block->GetDominator()->GetBlockId())) {
- // We couldn't find a suitable block.
- return nullptr;
- }
- target_block = target_block->GetDominator();
- DCHECK(target_block != nullptr);
- }
- // We shouldn't land in a loop if we weren't in one before traversing up the dominator tree
- // regarding try catches.
- DCHECK_IMPLIES(target_block->IsInLoop(), was_in_loop);
+ DCHECK_IMPLIES(target_block->IsInLoop(), was_in_loop);
+ }
// Find insertion position. No need to filter anymore, as we have found a
// target block.
@@ -271,10 +291,21 @@ static HInstruction* FindIdealPosition(HInstruction* instruction,
}
}
for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
- HInstruction* user = use.GetUser()->GetHolder();
+ HEnvironment* env = use.GetUser();
+ HInstruction* user = env->GetHolder();
if (user->GetBlock() == target_block &&
(insert_pos == nullptr || user->StrictlyDominates(insert_pos))) {
- insert_pos = user;
+ if (target_block->IsCatchBlock() && target_block->GetFirstInstruction() == user) {
+ // We can sink the instructions past the environment setting Nop. If we do that, we have to
+ // remove said instruction from the environment. Since we know that we will be sinking the
+ // instruction to this block and there are no more instructions to consider, we can safely
+ // remove it from the environment now.
+ DCHECK(target_block->GetFirstInstruction()->IsNop());
+ env->RemoveAsUserOfInput(use.GetIndex());
+ env->SetRawEnvAt(use.GetIndex(), /*instruction=*/ nullptr);
+ } else {
+ insert_pos = user;
+ }
}
}
if (insert_pos == nullptr) {
@@ -310,8 +341,8 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
ScopedArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc));
// Step (1): Visit post order to get a subset of blocks post dominated by `end_block`.
- // TODO(ngeoffray): Getting the full set of post-dominated shoud be done by
- // computint the post dominator tree, but that could be too time consuming. Also,
+ // TODO(ngeoffray): Getting the full set of post-dominated should be done by
+ // computing the post dominator tree, but that could be too time consuming. Also,
// we should start the analysis from blocks dominated by an uncommon branch, but we
// don't profile branches yet.
bool found_block = false;
@@ -321,45 +352,43 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
post_dominated.SetBit(block->GetBlockId());
} else if (found_block) {
bool is_post_dominated = true;
- if (block->GetSuccessors().empty()) {
- // We currently bail for loops.
- is_post_dominated = false;
- } else {
- // BasicBlock that are try entries look like this:
- // BasicBlock i:
- // instr 1
- // ...
- // instr N
- // TryBoundary kind:entry ---Try begins here---
- //
- // Due to how our BasicBlocks are structured, BasicBlock i will have an xhandler successor
- // since we are starting a try. If we use `GetSuccessors` for this case, we will check if
- // the catch block is post_dominated.
- //
- // However, this catch block doesn't matter: when we sink the instruction into that
- // BasicBlock i, we do it before the TryBoundary (i.e. outside of the try and outside the
- // catch's domain). We can ignore catch blocks using `GetNormalSuccessors` to sink code
- // right before the start of a try block.
- //
- // On the other side of the coin, BasicBlock that are try exits look like this:
- // BasicBlock j:
- // instr 1
- // ...
- // instr N
- // TryBoundary kind:exit ---Try ends here---
- //
- // If we sink to these basic blocks we would be sinking inside of the try so we would like
- // to check the catch block for post dominance.
- const bool ends_with_try_boundary_entry =
- block->EndsWithTryBoundary() && block->GetLastInstruction()->AsTryBoundary()->IsEntry();
- ArrayRef<HBasicBlock* const> successors =
- ends_with_try_boundary_entry ? block->GetNormalSuccessors() :
- ArrayRef<HBasicBlock* const>(block->GetSuccessors());
- for (HBasicBlock* successor : successors) {
- if (!post_dominated.IsBitSet(successor->GetBlockId())) {
- is_post_dominated = false;
- break;
- }
+ DCHECK_NE(block, graph_->GetExitBlock())
+ << "We shouldn't encounter the exit block after `end_block`.";
+
+ // BasicBlock that are try entries look like this:
+ // BasicBlock i:
+ // instr 1
+ // ...
+ // instr N
+ // TryBoundary kind:entry ---Try begins here---
+ //
+ // Due to how our BasicBlocks are structured, BasicBlock i will have an xhandler successor
+ // since we are starting a try. If we use `GetSuccessors` for this case, we will check if
+ // the catch block is post_dominated.
+ //
+ // However, this catch block doesn't matter: when we sink the instruction into that
+ // BasicBlock i, we do it before the TryBoundary (i.e. outside of the try and outside the
+ // catch's domain). We can ignore catch blocks using `GetNormalSuccessors` to sink code
+ // right before the start of a try block.
+ //
+ // On the other side of the coin, BasicBlock that are try exits look like this:
+ // BasicBlock j:
+ // instr 1
+ // ...
+ // instr N
+ // TryBoundary kind:exit ---Try ends here---
+ //
+ // If we sink to these basic blocks we would be sinking inside of the try so we would like
+ // to check the catch block for post dominance.
+ const bool ends_with_try_boundary_entry =
+ block->EndsWithTryBoundary() && block->GetLastInstruction()->AsTryBoundary()->IsEntry();
+ ArrayRef<HBasicBlock* const> successors =
+ ends_with_try_boundary_entry ? block->GetNormalSuccessors() :
+ ArrayRef<HBasicBlock* const>(block->GetSuccessors());
+ for (HBasicBlock* successor : successors) {
+ if (!post_dominated.IsBitSet(successor->GetBlockId())) {
+ is_post_dominated = false;
+ break;
}
}
if (is_post_dominated) {
@@ -509,4 +538,79 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
}
}
+void CodeSinking::ReturnSinking() {
+ HBasicBlock* exit = graph_->GetExitBlock();
+ DCHECK(exit != nullptr);
+
+ int number_of_returns = 0;
+ bool saw_return = false;
+ for (HBasicBlock* pred : exit->GetPredecessors()) {
+ // TODO(solanes): We might have Return/ReturnVoid->TryBoundary->Exit. We can theoretically
+ // handle them and move them out of the TryBoundary. However, it is a border case and it adds
+ // codebase complexity.
+ if (pred->GetLastInstruction()->IsReturn() || pred->GetLastInstruction()->IsReturnVoid()) {
+ saw_return |= pred->GetLastInstruction()->IsReturn();
+ ++number_of_returns;
+ }
+ }
+
+ if (number_of_returns < 2) {
+ // Nothing to do.
+ return;
+ }
+
+ // `new_block` will coalesce the Return instructions into Phi+Return, or the ReturnVoid
+ // instructions into a ReturnVoid.
+ HBasicBlock* new_block = new (graph_->GetAllocator()) HBasicBlock(graph_, exit->GetDexPc());
+ if (saw_return) {
+ HPhi* new_phi = nullptr;
+ for (size_t i = 0; i < exit->GetPredecessors().size(); /*++i in loop*/) {
+ HBasicBlock* pred = exit->GetPredecessors()[i];
+ if (!pred->GetLastInstruction()->IsReturn()) {
+ ++i;
+ continue;
+ }
+
+ HReturn* ret = pred->GetLastInstruction()->AsReturn();
+ if (new_phi == nullptr) {
+ // Create the new_phi, if we haven't done so yet. We do it here since we need to know the
+ // type to assign to it.
+ new_phi = new (graph_->GetAllocator()) HPhi(graph_->GetAllocator(),
+ kNoRegNumber,
+ /*number_of_inputs=*/0,
+ ret->InputAt(0)->GetType());
+ new_block->AddPhi(new_phi);
+ }
+ new_phi->AddInput(ret->InputAt(0));
+ pred->ReplaceAndRemoveInstructionWith(ret,
+ new (graph_->GetAllocator()) HGoto(ret->GetDexPc()));
+ pred->ReplaceSuccessor(exit, new_block);
+ // Since we are removing a predecessor, there's no need to increment `i`.
+ }
+ new_block->AddInstruction(new (graph_->GetAllocator()) HReturn(new_phi, exit->GetDexPc()));
+ } else {
+ for (size_t i = 0; i < exit->GetPredecessors().size(); /*++i in loop*/) {
+ HBasicBlock* pred = exit->GetPredecessors()[i];
+ if (!pred->GetLastInstruction()->IsReturnVoid()) {
+ ++i;
+ continue;
+ }
+
+ HReturnVoid* ret = pred->GetLastInstruction()->AsReturnVoid();
+ pred->ReplaceAndRemoveInstructionWith(ret,
+ new (graph_->GetAllocator()) HGoto(ret->GetDexPc()));
+ pred->ReplaceSuccessor(exit, new_block);
+ // Since we are removing a predecessor, there's no need to increment `i`.
+ }
+ new_block->AddInstruction(new (graph_->GetAllocator()) HReturnVoid(exit->GetDexPc()));
+ }
+
+ new_block->AddSuccessor(exit);
+ graph_->AddBlock(new_block);
+
+ // Recompute dominance since we added a new block.
+ graph_->ClearDominanceInformation();
+ graph_->ComputeDominanceInformation();
+}
+
} // namespace art
diff --git a/compiler/optimizing/code_sinking.h b/compiler/optimizing/code_sinking.h
index 8eb3a520c3..c743db40d9 100644
--- a/compiler/optimizing/code_sinking.h
+++ b/compiler/optimizing/code_sinking.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_CODE_SINKING_H_
#define ART_COMPILER_OPTIMIZING_CODE_SINKING_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
/**
* Optimization pass to move instructions into uncommon branches,
@@ -38,10 +39,16 @@ class CodeSinking : public HOptimization {
static constexpr const char* kCodeSinkingPassName = "code_sinking";
private:
- // Try to move code only used by `end_block` and all its post-dominated / dominated
+ // Tries to sink code to uncommon branches.
+ void UncommonBranchSinking();
+ // Tries to move code only used by `end_block` and all its post-dominated / dominated
// blocks, to these blocks.
void SinkCodeToUncommonBranch(HBasicBlock* end_block);
+ // Coalesces the Return/ReturnVoid instructions into one, if we have two or more. We do this to
+ // avoid generating the exit frame code several times.
+ void ReturnSinking();
+
DISALLOW_COPY_AND_ASSIGN(CodeSinking);
};
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index c0441b07ed..2d9acc49b3 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -33,7 +33,7 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
// Return all combinations of ISA and code generator that are executable on
// hardware, or on simulator, and that we'd like to test.
@@ -64,7 +64,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
return v;
}
-class CodegenTest : public OptimizingUnitTest {
+class CodegenTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
void TestCode(const std::vector<uint16_t>& data, bool has_result = false, int32_t expected = 0);
void TestCodeLong(const std::vector<uint16_t>& data, bool has_result, int64_t expected);
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index 397e601cee..7af9d0f44c 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -20,6 +20,7 @@
#include "arch/arm/registers_arm.h"
#include "arch/instruction_set.h"
#include "arch/x86/registers_x86.h"
+#include "base/macros.h"
#include "code_simulator.h"
#include "code_simulator_container.h"
#include "common_compiler_test.h"
@@ -35,6 +36,10 @@
#include "code_generator_arm64.h"
#endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+#include "code_generator_riscv64.h"
+#endif
+
#ifdef ART_ENABLE_CODEGEN_x86
#include "code_generator_x86.h"
#endif
@@ -43,9 +48,9 @@
#include "code_generator_x86_64.h"
#endif
-namespace art {
+namespace art HIDDEN {
-typedef CodeGenerator* (*CreateCodegenFn)(HGraph*, const CompilerOptions&);
+using CreateCodegenFn = CodeGenerator* (*)(HGraph*, const CompilerOptions&);
class CodegenTargetConfig {
public:
@@ -254,15 +259,11 @@ static void Run(const InternalCodeAllocator& allocator,
Runtime* GetRuntime() override { return nullptr; }
};
CodeHolder code_holder;
- const void* code_ptr =
+ const void* method_code =
code_holder.MakeExecutable(allocator.GetMemory(), ArrayRef<const uint8_t>(), target_isa);
- typedef Expected (*fptr)();
- fptr f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(code_ptr));
- if (target_isa == InstructionSet::kThumb2) {
- // For thumb we need the bottom bit set.
- f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1);
- }
+ using fptr = Expected (*)();
+ fptr f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(method_code));
VerifyGeneratedCode(target_isa, f, has_result, expected);
}
@@ -332,6 +333,10 @@ inline CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions&
}
#endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+inline CodeGenerator* create_codegen_riscv64(HGraph*, const CompilerOptions&) { return nullptr; }
+#endif
+
#ifdef ART_ENABLE_CODEGEN_x86
inline CodeGenerator* create_codegen_x86(HGraph* graph, const CompilerOptions& compiler_options) {
return new (graph->GetAllocator()) TestCodeGeneratorX86(graph, compiler_options);
diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h
index 320915ee57..5f71cb906c 100644
--- a/compiler/optimizing/common_arm.h
+++ b/compiler/optimizing/common_arm.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM_H_
#define ART_COMPILER_OPTIMIZING_COMMON_ARM_H_
+#include "base/macros.h"
#include "instruction_simplifier_shared.h"
#include "locations.h"
#include "nodes.h"
@@ -28,7 +29,7 @@
#include "aarch32/macro-assembler-aarch32.h"
#pragma GCC diagnostic pop
-namespace art {
+namespace art HIDDEN {
using helpers::HasShifterOperand;
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index 81c6561318..20b0e38af5 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_
#define ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_
+#include "base/macros.h"
#include "code_generator.h"
#include "instruction_simplifier_shared.h"
#include "locations.h"
@@ -31,7 +32,7 @@
#include "aarch64/simulator-aarch64.h"
#pragma GCC diagnostic pop
-namespace art {
+namespace art HIDDEN {
using helpers::CanFitInShifterOperand;
using helpers::HasShifterOperand;
@@ -153,7 +154,7 @@ inline vixl::aarch64::CPURegister InputCPURegisterOrZeroRegAt(HInstruction* inst
int index) {
HInstruction* input = instr->InputAt(index);
DataType::Type input_type = input->GetType();
- if (input->IsConstant() && input->AsConstant()->IsZeroBitPattern()) {
+ if (IsZeroBitPattern(input)) {
return (DataType::Size(input_type) >= vixl::aarch64::kXRegSizeInBytes)
? vixl::aarch64::Register(vixl::aarch64::xzr)
: vixl::aarch64::Register(vixl::aarch64::wzr);
@@ -314,7 +315,7 @@ inline Location ARM64EncodableConstantOrRegister(HInstruction* constant,
HInstruction* instr) {
if (constant->IsConstant()
&& Arm64CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
- return Location::ConstantLocation(constant->AsConstant());
+ return Location::ConstantLocation(constant);
}
return Location::RequiresRegister();
@@ -380,10 +381,6 @@ inline bool ShifterOperandSupportsExtension(HInstruction* instruction) {
return instruction->IsAdd() || instruction->IsSub();
}
-inline bool IsConstantZeroBitPattern(const HInstruction* instruction) {
- return instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern();
-}
-
} // namespace helpers
} // namespace arm64
} // namespace art
diff --git a/compiler/optimizing/common_dominator.h b/compiler/optimizing/common_dominator.h
index 9f012cfbb2..f01270ee4a 100644
--- a/compiler/optimizing/common_dominator.h
+++ b/compiler/optimizing/common_dominator.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_
#define ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_
+#include "base/macros.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
// Helper class for finding common dominators of two or more blocks in a graph.
// The domination information of a graph must not be modified while there is
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc
index 2031707759..06d19e3f29 100644
--- a/compiler/optimizing/constant_folding.cc
+++ b/compiler/optimizing/constant_folding.cc
@@ -16,14 +16,20 @@
#include "constant_folding.h"
-namespace art {
+#include <algorithm>
+
+#include "dex/dex_file-inl.h"
+#include "optimizing/data_type.h"
+#include "optimizing/nodes.h"
+
+namespace art HIDDEN {
// This visitor tries to simplify instructions that can be evaluated
// as constants.
-class HConstantFoldingVisitor : public HGraphDelegateVisitor {
+class HConstantFoldingVisitor final : public HGraphDelegateVisitor {
public:
- explicit HConstantFoldingVisitor(HGraph* graph)
- : HGraphDelegateVisitor(graph) {}
+ HConstantFoldingVisitor(HGraph* graph, OptimizingCompilerStats* stats, bool use_all_optimizations)
+ : HGraphDelegateVisitor(graph, stats), use_all_optimizations_(use_all_optimizations) {}
private:
void VisitBasicBlock(HBasicBlock* block) override;
@@ -31,8 +37,15 @@ class HConstantFoldingVisitor : public HGraphDelegateVisitor {
void VisitUnaryOperation(HUnaryOperation* inst) override;
void VisitBinaryOperation(HBinaryOperation* inst) override;
- void VisitTypeConversion(HTypeConversion* inst) override;
+ void VisitArrayLength(HArrayLength* inst) override;
void VisitDivZeroCheck(HDivZeroCheck* inst) override;
+ void VisitIf(HIf* inst) override;
+ void VisitTypeConversion(HTypeConversion* inst) override;
+
+ void PropagateValue(HBasicBlock* starting_block, HInstruction* variable, HConstant* constant);
+
+ // Use all optimizations without restrictions.
+ bool use_all_optimizations_;
DISALLOW_COPY_AND_ASSIGN(HConstantFoldingVisitor);
};
@@ -55,6 +68,11 @@ class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor {
void VisitBelow(HBelow* instruction) override;
void VisitBelowOrEqual(HBelowOrEqual* instruction) override;
+ void VisitGreaterThan(HGreaterThan* instruction) override;
+ void VisitGreaterThanOrEqual(HGreaterThanOrEqual* instruction) override;
+ void VisitLessThan(HLessThan* instruction) override;
+ void VisitLessThanOrEqual(HLessThanOrEqual* instruction) override;
+
void VisitAnd(HAnd* instruction) override;
void VisitCompare(HCompare* instruction) override;
void VisitMul(HMul* instruction) override;
@@ -69,7 +87,7 @@ class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor {
bool HConstantFolding::Run() {
- HConstantFoldingVisitor visitor(graph_);
+ HConstantFoldingVisitor visitor(graph_, stats_, use_all_optimizations_);
// Process basic blocks in reverse post-order in the dominator tree,
// so that an instruction turned into a constant, used as input of
// another instruction, may possibly be used to turn that second
@@ -111,16 +129,6 @@ void HConstantFoldingVisitor::VisitBinaryOperation(HBinaryOperation* inst) {
}
}
-void HConstantFoldingVisitor::VisitTypeConversion(HTypeConversion* inst) {
- // Constant folding: replace `TypeConversion(a)' with a constant at
- // compile time if `a' is a constant.
- HConstant* constant = inst->TryStaticEvaluation();
- if (constant != nullptr) {
- inst->ReplaceWith(constant);
- inst->GetBlock()->RemoveInstruction(inst);
- }
-}
-
void HConstantFoldingVisitor::VisitDivZeroCheck(HDivZeroCheck* inst) {
// We can safely remove the check if the input is a non-null constant.
HInstruction* check_input = inst->InputAt(0);
@@ -130,6 +138,169 @@ void HConstantFoldingVisitor::VisitDivZeroCheck(HDivZeroCheck* inst) {
}
}
+void HConstantFoldingVisitor::PropagateValue(HBasicBlock* starting_block,
+ HInstruction* variable,
+ HConstant* constant) {
+ const bool recording_stats = stats_ != nullptr;
+ size_t uses_before = 0;
+ size_t uses_after = 0;
+ if (recording_stats) {
+ uses_before = variable->GetUses().SizeSlow();
+ }
+
+ if (variable->GetUses().HasExactlyOneElement()) {
+ // Nothing to do, since we only have the `if (variable)` use or the `condition` use.
+ return;
+ }
+
+ variable->ReplaceUsesDominatedBy(
+ starting_block->GetFirstInstruction(), constant, /* strictly_dominated= */ false);
+
+ if (recording_stats) {
+ uses_after = variable->GetUses().SizeSlow();
+ DCHECK_GE(uses_after, 1u) << "we must at least have the use in the if clause.";
+ DCHECK_GE(uses_before, uses_after);
+ MaybeRecordStat(stats_, MethodCompilationStat::kPropagatedIfValue, uses_before - uses_after);
+ }
+}
+
+void HConstantFoldingVisitor::VisitIf(HIf* inst) {
+ // This optimization can take a lot of compile time since we have a lot of If instructions in
+ // graphs.
+ if (!use_all_optimizations_) {
+ return;
+ }
+
+ // Consistency check: the true and false successors do not dominate each other.
+ DCHECK(!inst->IfTrueSuccessor()->Dominates(inst->IfFalseSuccessor()) &&
+ !inst->IfFalseSuccessor()->Dominates(inst->IfTrueSuccessor()));
+
+ HInstruction* if_input = inst->InputAt(0);
+
+ // Already a constant.
+ if (if_input->IsConstant()) {
+ return;
+ }
+
+ // if (variable) {
+ // SSA `variable` guaranteed to be true
+ // } else {
+ // and here false
+ // }
+ PropagateValue(inst->IfTrueSuccessor(), if_input, GetGraph()->GetIntConstant(1));
+ PropagateValue(inst->IfFalseSuccessor(), if_input, GetGraph()->GetIntConstant(0));
+
+ // If the input is a condition, we can propagate the information of the condition itself.
+ if (!if_input->IsCondition()) {
+ return;
+ }
+ HCondition* condition = if_input->AsCondition();
+
+ // We want either `==` or `!=`, since we cannot make assumptions for other conditions e.g. `>`
+ if (!condition->IsEqual() && !condition->IsNotEqual()) {
+ return;
+ }
+
+ HInstruction* left = condition->GetLeft();
+ HInstruction* right = condition->GetRight();
+
+ // We want one of them to be a constant and not the other.
+ if (left->IsConstant() == right->IsConstant()) {
+ return;
+ }
+
+ // At this point we have something like:
+ // if (variable == constant) {
+ // SSA `variable` guaranteed to be equal to constant here
+ // } else {
+ // No guarantees can be made here (usually, see boolean case below).
+ // }
+ // Similarly with variable != constant, except that we can make guarantees in the else case.
+
+ HConstant* constant = left->IsConstant() ? left->AsConstant() : right->AsConstant();
+ HInstruction* variable = left->IsConstant() ? right : left;
+
+ // Don't deal with floats/doubles since they bring a lot of edge cases e.g.
+ // if (f == 0.0f) {
+ // // f is not really guaranteed to be 0.0f. It could be -0.0f, for example
+ // }
+ if (DataType::IsFloatingPointType(variable->GetType())) {
+ return;
+ }
+ DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
+
+ // Sometimes we have an HCompare flowing into an Equals/NonEquals, which can act as a proxy. For
+ // example: `Equals(Compare(var, constant), 0)`. This is common for long, float, and double.
+ if (variable->IsCompare()) {
+ // We only care about equality comparisons so we skip if it is a less or greater comparison.
+ if (!constant->IsArithmeticZero()) {
+ return;
+ }
+
+ // Update left and right to be the ones from the HCompare.
+ left = variable->AsCompare()->GetLeft();
+ right = variable->AsCompare()->GetRight();
+
+ // Re-check that one of them to be a constant and not the other.
+ if (left->IsConstant() == right->IsConstant()) {
+ return;
+ }
+
+ constant = left->IsConstant() ? left->AsConstant() : right->AsConstant();
+ variable = left->IsConstant() ? right : left;
+
+ // Re-check floating point values.
+ if (DataType::IsFloatingPointType(variable->GetType())) {
+ return;
+ }
+ DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
+ }
+
+ // From this block forward we want to replace the SSA value. We use `starting_block` and not the
+ // `if` block as we want to update one of the branches but not the other.
+ HBasicBlock* starting_block =
+ condition->IsEqual() ? inst->IfTrueSuccessor() : inst->IfFalseSuccessor();
+
+ PropagateValue(starting_block, variable, constant);
+
+ // Special case for booleans since they have only two values so we know what to propagate in the
+ // other branch. However, sometimes our boolean values are not compared to 0 or 1. In those cases
+ // we cannot make an assumption for the `else` branch.
+ if (variable->GetType() == DataType::Type::kBool &&
+ constant->IsIntConstant() &&
+ (constant->AsIntConstant()->IsTrue() || constant->AsIntConstant()->IsFalse())) {
+ HBasicBlock* other_starting_block =
+ condition->IsEqual() ? inst->IfFalseSuccessor() : inst->IfTrueSuccessor();
+ DCHECK_NE(other_starting_block, starting_block);
+
+ HConstant* other_constant = constant->AsIntConstant()->IsTrue() ?
+ GetGraph()->GetIntConstant(0) :
+ GetGraph()->GetIntConstant(1);
+ DCHECK_NE(other_constant, constant);
+ PropagateValue(other_starting_block, variable, other_constant);
+ }
+}
+
+void HConstantFoldingVisitor::VisitArrayLength(HArrayLength* inst) {
+ HInstruction* input = inst->InputAt(0);
+ if (input->IsLoadString()) {
+ DCHECK(inst->IsStringLength());
+ HLoadString* load_string = input->AsLoadString();
+ const DexFile& dex_file = load_string->GetDexFile();
+ const dex::StringId& string_id = dex_file.GetStringId(load_string->GetStringIndex());
+ inst->ReplaceWith(GetGraph()->GetIntConstant(dex_file.GetStringLength(string_id)));
+ }
+}
+
+void HConstantFoldingVisitor::VisitTypeConversion(HTypeConversion* inst) {
+ // Constant folding: replace `TypeConversion(a)' with a constant at
+ // compile time if `a' is a constant.
+ HConstant* constant = inst->TryStaticEvaluation();
+ if (constant != nullptr) {
+ inst->ReplaceWith(constant);
+ inst->GetBlock()->RemoveInstruction(inst);
+ }
+}
void InstructionWithAbsorbingInputSimplifier::VisitShift(HBinaryOperation* instruction) {
DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr());
@@ -145,8 +316,17 @@ void InstructionWithAbsorbingInputSimplifier::VisitShift(HBinaryOperation* instr
}
void InstructionWithAbsorbingInputSimplifier::VisitEqual(HEqual* instruction) {
- if ((instruction->GetLeft()->IsNullConstant() && !instruction->GetRight()->CanBeNull()) ||
- (instruction->GetRight()->IsNullConstant() && !instruction->GetLeft()->CanBeNull())) {
+ if (instruction->GetLeft() == instruction->GetRight() &&
+ !DataType::IsFloatingPointType(instruction->GetLeft()->GetType())) {
+ // Replace code looking like
+ // EQUAL lhs, lhs
+ // CONSTANT true
+ // We don't perform this optimizations for FP types since Double.NaN != Double.NaN, which is the
+ // opposite value.
+ instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ } else if ((instruction->GetLeft()->IsNullConstant() && !instruction->GetRight()->CanBeNull()) ||
+ (instruction->GetRight()->IsNullConstant() && !instruction->GetLeft()->CanBeNull())) {
// Replace code looking like
// EQUAL lhs, null
// where lhs cannot be null with
@@ -157,8 +337,17 @@ void InstructionWithAbsorbingInputSimplifier::VisitEqual(HEqual* instruction) {
}
void InstructionWithAbsorbingInputSimplifier::VisitNotEqual(HNotEqual* instruction) {
- if ((instruction->GetLeft()->IsNullConstant() && !instruction->GetRight()->CanBeNull()) ||
- (instruction->GetRight()->IsNullConstant() && !instruction->GetLeft()->CanBeNull())) {
+ if (instruction->GetLeft() == instruction->GetRight() &&
+ !DataType::IsFloatingPointType(instruction->GetLeft()->GetType())) {
+ // Replace code looking like
+ // NOT_EQUAL lhs, lhs
+ // CONSTANT false
+ // We don't perform this optimizations for FP types since Double.NaN != Double.NaN, which is the
+ // opposite value.
+ instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ } else if ((instruction->GetLeft()->IsNullConstant() && !instruction->GetRight()->CanBeNull()) ||
+ (instruction->GetRight()->IsNullConstant() && !instruction->GetLeft()->CanBeNull())) {
// Replace code looking like
// NOT_EQUAL lhs, null
// where lhs cannot be null with
@@ -169,8 +358,14 @@ void InstructionWithAbsorbingInputSimplifier::VisitNotEqual(HNotEqual* instructi
}
void InstructionWithAbsorbingInputSimplifier::VisitAbove(HAbove* instruction) {
- if (instruction->GetLeft()->IsConstant() &&
- instruction->GetLeft()->AsConstant()->IsArithmeticZero()) {
+ if (instruction->GetLeft() == instruction->GetRight()) {
+ // Replace code looking like
+ // ABOVE lhs, lhs
+ // CONSTANT false
+ instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ } else if (instruction->GetLeft()->IsConstant() &&
+ instruction->GetLeft()->AsConstant()->IsArithmeticZero()) {
// Replace code looking like
// ABOVE dst, 0, src // unsigned 0 > src is always false
// with
@@ -181,8 +376,14 @@ void InstructionWithAbsorbingInputSimplifier::VisitAbove(HAbove* instruction) {
}
void InstructionWithAbsorbingInputSimplifier::VisitAboveOrEqual(HAboveOrEqual* instruction) {
- if (instruction->GetRight()->IsConstant() &&
- instruction->GetRight()->AsConstant()->IsArithmeticZero()) {
+ if (instruction->GetLeft() == instruction->GetRight()) {
+ // Replace code looking like
+ // ABOVE_OR_EQUAL lhs, lhs
+ // CONSTANT true
+ instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ } else if (instruction->GetRight()->IsConstant() &&
+ instruction->GetRight()->AsConstant()->IsArithmeticZero()) {
// Replace code looking like
// ABOVE_OR_EQUAL dst, src, 0 // unsigned src >= 0 is always true
// with
@@ -193,8 +394,14 @@ void InstructionWithAbsorbingInputSimplifier::VisitAboveOrEqual(HAboveOrEqual* i
}
void InstructionWithAbsorbingInputSimplifier::VisitBelow(HBelow* instruction) {
- if (instruction->GetRight()->IsConstant() &&
- instruction->GetRight()->AsConstant()->IsArithmeticZero()) {
+ if (instruction->GetLeft() == instruction->GetRight()) {
+ // Replace code looking like
+ // BELOW lhs, lhs
+ // CONSTANT false
+ instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ } else if (instruction->GetRight()->IsConstant() &&
+ instruction->GetRight()->AsConstant()->IsArithmeticZero()) {
// Replace code looking like
// BELOW dst, src, 0 // unsigned src < 0 is always false
// with
@@ -205,8 +412,14 @@ void InstructionWithAbsorbingInputSimplifier::VisitBelow(HBelow* instruction) {
}
void InstructionWithAbsorbingInputSimplifier::VisitBelowOrEqual(HBelowOrEqual* instruction) {
- if (instruction->GetLeft()->IsConstant() &&
- instruction->GetLeft()->AsConstant()->IsArithmeticZero()) {
+ if (instruction->GetLeft() == instruction->GetRight()) {
+ // Replace code looking like
+ // BELOW_OR_EQUAL lhs, lhs
+ // CONSTANT true
+ instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ } else if (instruction->GetLeft()->IsConstant() &&
+ instruction->GetLeft()->AsConstant()->IsArithmeticZero()) {
// Replace code looking like
// BELOW_OR_EQUAL dst, 0, src // unsigned 0 <= src is always true
// with
@@ -216,6 +429,55 @@ void InstructionWithAbsorbingInputSimplifier::VisitBelowOrEqual(HBelowOrEqual* i
}
}
+void InstructionWithAbsorbingInputSimplifier::VisitGreaterThan(HGreaterThan* instruction) {
+ if (instruction->GetLeft() == instruction->GetRight() &&
+ (!DataType::IsFloatingPointType(instruction->GetLeft()->GetType()) ||
+ instruction->IsLtBias())) {
+ // Replace code looking like
+ // GREATER_THAN lhs, lhs
+ // CONSTANT false
+ instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ }
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitGreaterThanOrEqual(
+ HGreaterThanOrEqual* instruction) {
+ if (instruction->GetLeft() == instruction->GetRight() &&
+ (!DataType::IsFloatingPointType(instruction->GetLeft()->GetType()) ||
+ instruction->IsGtBias())) {
+ // Replace code looking like
+ // GREATER_THAN_OR_EQUAL lhs, lhs
+ // CONSTANT true
+ instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ }
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitLessThan(HLessThan* instruction) {
+ if (instruction->GetLeft() == instruction->GetRight() &&
+ (!DataType::IsFloatingPointType(instruction->GetLeft()->GetType()) ||
+ instruction->IsGtBias())) {
+ // Replace code looking like
+ // LESS_THAN lhs, lhs
+ // CONSTANT false
+ instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ }
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitLessThanOrEqual(HLessThanOrEqual* instruction) {
+ if (instruction->GetLeft() == instruction->GetRight() &&
+ (!DataType::IsFloatingPointType(instruction->GetLeft()->GetType()) ||
+ instruction->IsLtBias())) {
+ // Replace code looking like
+ // LESS_THAN_OR_EQUAL lhs, lhs
+ // CONSTANT true
+ instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ }
+}
+
void InstructionWithAbsorbingInputSimplifier::VisitAnd(HAnd* instruction) {
DataType::Type type = instruction->GetType();
HConstant* input_cst = instruction->GetConstantRight();
diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h
index 72bd95b3cb..29648e907c 100644
--- a/compiler/optimizing/constant_folding.h
+++ b/compiler/optimizing/constant_folding.h
@@ -17,10 +17,12 @@
#ifndef ART_COMPILER_OPTIMIZING_CONSTANT_FOLDING_H_
#define ART_COMPILER_OPTIMIZING_CONSTANT_FOLDING_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
+#include "optimizing/optimizing_compiler_stats.h"
-namespace art {
+namespace art HIDDEN {
/**
* Optimization pass performing a simple constant-expression
@@ -39,13 +41,20 @@ namespace art {
*/
class HConstantFolding : public HOptimization {
public:
- HConstantFolding(HGraph* graph, const char* name) : HOptimization(graph, name) {}
+ HConstantFolding(HGraph* graph,
+ OptimizingCompilerStats* stats = nullptr,
+ const char* name = kConstantFoldingPassName,
+ bool use_all_optimizations = false)
+ : HOptimization(graph, name, stats), use_all_optimizations_(use_all_optimizations) {}
bool Run() override;
static constexpr const char* kConstantFoldingPassName = "constant_folding";
private:
+ // Use all optimizations without restrictions.
+ bool use_all_optimizations_;
+
DISALLOW_COPY_AND_ASSIGN(HConstantFolding);
};
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index 74d9d3a993..741fd3f822 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -17,6 +17,8 @@
#include <functional>
#include "constant_folding.h"
+
+#include "base/macros.h"
#include "dead_code_elimination.h"
#include "driver/compiler_options.h"
#include "graph_checker.h"
@@ -25,12 +27,12 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
/**
* Fixture class for the constant folding and dce tests.
*/
-class ConstantFoldingTest : public OptimizingUnitTest {
+class ConstantFoldingTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
public:
ConstantFoldingTest() : graph_(nullptr) { }
@@ -58,7 +60,9 @@ class ConstantFoldingTest : public OptimizingUnitTest {
std::string actual_before = printer_before.str();
EXPECT_EQ(expected_before, actual_before);
- HConstantFolding(graph_, "constant_folding").Run();
+ HConstantFolding constant_folding(
+ graph_, /* stats= */ nullptr, "constant_folding", /* use_all_optimizations= */ true);
+ constant_folding.Run();
GraphChecker graph_checker_cf(graph_);
graph_checker_cf.Run();
ASSERT_TRUE(graph_checker_cf.IsValid());
diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.cc b/compiler/optimizing/constructor_fence_redundancy_elimination.cc
index 3a1a9e023d..d9b7652f32 100644
--- a/compiler/optimizing/constructor_fence_redundancy_elimination.cc
+++ b/compiler/optimizing/constructor_fence_redundancy_elimination.cc
@@ -20,12 +20,12 @@
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
-namespace art {
+namespace art HIDDEN {
static constexpr bool kCfreLogFenceInputCount = false;
// TODO: refactor this code by reusing escape analysis.
-class CFREVisitor : public HGraphVisitor {
+class CFREVisitor final : public HGraphVisitor {
public:
CFREVisitor(HGraph* graph, OptimizingCompilerStats* stats)
: HGraphVisitor(graph),
@@ -147,16 +147,6 @@ class CFREVisitor : public HGraphVisitor {
void VisitAlias(HInstruction* aliasing_inst) {
// An object is considered "published" if it becomes aliased by other instructions.
if (HasInterestingPublishTargetAsInput(aliasing_inst)) {
- // Note that constructing a "NullCheck" for new-instance, new-array,
- // or a 'this' (receiver) reference is impossible.
- //
- // If by some reason we actually encounter such a NullCheck(FenceTarget),
- // we LOG(WARNING).
- if (UNLIKELY(aliasing_inst->IsNullCheck())) {
- LOG(kIsDebugBuild ? FATAL : WARNING)
- << "Unexpected instruction: NullCheck; should not be legal in graph";
- // We then do a best-effort to handle this case.
- }
MergeCandidateFences();
}
}
diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.h b/compiler/optimizing/constructor_fence_redundancy_elimination.h
index 014b342258..e04b986171 100644
--- a/compiler/optimizing/constructor_fence_redundancy_elimination.h
+++ b/compiler/optimizing/constructor_fence_redundancy_elimination.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_
#define ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_
+#include "base/macros.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
/*
* Constructor Fence Redundancy Elimination (CFRE).
diff --git a/compiler/optimizing/critical_native_abi_fixup_arm.cc b/compiler/optimizing/critical_native_abi_fixup_arm.cc
index 3c4db4bca7..77e156608b 100644
--- a/compiler/optimizing/critical_native_abi_fixup_arm.cc
+++ b/compiler/optimizing/critical_native_abi_fixup_arm.cc
@@ -23,7 +23,7 @@
#include "scoped_thread_state_change-inl.h"
#include "well_known_classes.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
// Fix up FP arguments passed in core registers for call to @CriticalNative by inserting fake calls
@@ -45,9 +45,9 @@ static void FixUpArguments(HInvokeStaticOrDirect* invoke) {
if (DataType::IsFloatingPointType(input_type)) {
bool is_double = (input_type == DataType::Type::kFloat64);
DataType::Type converted_type = is_double ? DataType::Type::kInt64 : DataType::Type::kInt32;
- jmethodID known_method = is_double ? WellKnownClasses::java_lang_Double_doubleToRawLongBits
- : WellKnownClasses::java_lang_Float_floatToRawIntBits;
- ArtMethod* resolved_method = jni::DecodeArtMethod(known_method);
+ ArtMethod* resolved_method = is_double
+ ? WellKnownClasses::java_lang_Double_doubleToRawLongBits
+ : WellKnownClasses::java_lang_Float_floatToRawIntBits;
DCHECK(resolved_method != nullptr);
DCHECK(resolved_method->IsIntrinsic());
MethodReference target_method(nullptr, 0);
@@ -74,7 +74,8 @@ static void FixUpArguments(HInvokeStaticOrDirect* invoke) {
dispatch_info,
kStatic,
target_method,
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !block->GetGraph()->IsDebuggable());
// The intrinsic has no side effects and does not need environment or dex cache on ARM.
new_input->SetSideEffects(SideEffects::None());
IntrinsicOptimizations opt(new_input);
diff --git a/compiler/optimizing/critical_native_abi_fixup_arm.h b/compiler/optimizing/critical_native_abi_fixup_arm.h
index faa3c7a5fe..c2068f5e2d 100644
--- a/compiler/optimizing/critical_native_abi_fixup_arm.h
+++ b/compiler/optimizing/critical_native_abi_fixup_arm.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_CRITICAL_NATIVE_ABI_FIXUP_ARM_H_
#define ART_COMPILER_OPTIMIZING_CRITICAL_NATIVE_ABI_FIXUP_ARM_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
class CriticalNativeAbiFixupArm : public HOptimization {
diff --git a/compiler/optimizing/data_type-inl.h b/compiler/optimizing/data_type-inl.h
index 1b33b775da..bbfe90451b 100644
--- a/compiler/optimizing/data_type-inl.h
+++ b/compiler/optimizing/data_type-inl.h
@@ -20,7 +20,7 @@
#include "data_type.h"
#include "dex/primitive.h"
-namespace art {
+namespace art HIDDEN {
// Note: Not declared in data_type.h to avoid pulling in "primitive.h".
constexpr DataType::Type DataTypeFromPrimitive(Primitive::Type type) {
diff --git a/compiler/optimizing/data_type.cc b/compiler/optimizing/data_type.cc
index cb354f46cc..183cf2c622 100644
--- a/compiler/optimizing/data_type.cc
+++ b/compiler/optimizing/data_type.cc
@@ -16,7 +16,7 @@
#include "data_type.h"
-namespace art {
+namespace art HIDDEN {
static const char* kTypeNames[] = {
"Reference",
diff --git a/compiler/optimizing/data_type.h b/compiler/optimizing/data_type.h
index ec6ca7accb..b6d9519150 100644
--- a/compiler/optimizing/data_type.h
+++ b/compiler/optimizing/data_type.h
@@ -22,8 +22,9 @@
#include <android-base/logging.h>
#include "base/bit_utils.h"
+#include "base/macros.h"
-namespace art {
+namespace art HIDDEN {
class DataType {
public:
diff --git a/compiler/optimizing/data_type_test.cc b/compiler/optimizing/data_type_test.cc
index 8fea22bce8..f6f614d8c4 100644
--- a/compiler/optimizing/data_type_test.cc
+++ b/compiler/optimizing/data_type_test.cc
@@ -22,7 +22,7 @@
#include "base/macros.h"
#include "dex/primitive.h"
-namespace art {
+namespace art HIDDEN {
template <DataType::Type data_type, Primitive::Type primitive_type>
static void CheckConversion() {
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index d808f2ca3a..cf49e39849 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -16,14 +16,17 @@
#include "dead_code_elimination.h"
+#include "android-base/logging.h"
#include "base/array_ref.h"
#include "base/bit_vector-inl.h"
+#include "base/logging.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "base/stl_util.h"
+#include "optimizing/nodes.h"
#include "ssa_phi_elimination.h"
-namespace art {
+namespace art HIDDEN {
static void MarkReachableBlocks(HGraph* graph, ArenaBitVector* visited) {
// Use local allocator for allocating memory.
@@ -178,6 +181,13 @@ static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* thr
} else if (!cond->InputAt(0)->IsNullConstant()) {
return false;
}
+
+ // We can't create a BoundType for an object with an invalid RTI.
+ const ReferenceTypeInfo ti = obj->GetReferenceTypeInfo();
+ if (!ti.IsValid()) {
+ return false;
+ }
+
// Scan all uses of obj and find null check under control dependence.
HBoundType* bound = nullptr;
const HUseList<HInstruction*>& uses = obj->GetUses();
@@ -190,7 +200,6 @@ static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* thr
user_block != throws &&
block->Dominates(user_block)) {
if (bound == nullptr) {
- ReferenceTypeInfo ti = obj->GetReferenceTypeInfo();
bound = new (obj->GetBlock()->GetGraph()->GetAllocator()) HBoundType(obj);
bound->SetUpperBound(ti, /*can_be_null*/ false);
bound->SetReferenceTypeInfo(ti);
@@ -213,6 +222,9 @@ static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* thr
// | ...
// | instr_n
// | foo() // always throws
+// | instr_n+2
+// | ...
+// | instr_n+m
// \ goto B2
// \ /
// B2
@@ -230,11 +242,14 @@ static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* thr
// B2 Exit
//
// Rationale:
-// Removal of the never taken edge to B2 may expose
-// other optimization opportunities, such as code sinking.
+// Removal of the never taken edge to B2 may expose other optimization opportunities, such as code
+// sinking.
+//
+// Note: The example above is a simple one that uses a `goto` but we could end the block with an If,
+// for example.
bool HDeadCodeElimination::SimplifyAlwaysThrows() {
HBasicBlock* exit = graph_->GetExitBlock();
- if (exit == nullptr) {
+ if (!graph_->HasAlwaysThrowingInvokes() || exit == nullptr) {
return false;
}
@@ -242,54 +257,55 @@ bool HDeadCodeElimination::SimplifyAlwaysThrows() {
// Order does not matter, just pick one.
for (HBasicBlock* block : graph_->GetReversePostOrder()) {
- if (block->GetTryCatchInformation() != nullptr) {
+ if (block->IsTryBlock()) {
// We don't want to perform the simplify always throws optimizations for throws inside of
- // tries since those throws might not go to the exit block. We do that by checking the
- // TryCatchInformation of the blocks.
- //
- // As a special case the `catch_block` is the first block of the catch and it has
- // TryCatchInformation. Other blocks in the catch don't have try catch information (as long as
- // they are not part of an outer try). Knowing if a `catch_block` is part of an outer try is
- // possible by checking its successors, but other restrictions of the simplify always throws
- // optimization will block `catch_block` nevertheless (e.g. only one predecessor) so it is not
- // worth the effort.
-
- // TODO(solanes): Maybe we can do a `goto catch` if inside of a try catch instead of going to
- // the exit. If we do so, we have to take into account that we should go to the nearest valid
- // catch i.e. one that would accept our exception type.
+ // tries since those throws might not go to the exit block.
continue;
}
- HInstruction* last = block->GetLastInstruction();
- HInstruction* prev = last->GetPrevious();
- if (prev == nullptr) {
- DCHECK_EQ(block->GetFirstInstruction(), block->GetLastInstruction());
+ // We iterate to find the first instruction that always throws. If two instructions always
+ // throw, the first one will throw and the second one will never be reached.
+ HInstruction* throwing_invoke = nullptr;
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ if (it.Current()->IsInvoke() && it.Current()->AsInvoke()->AlwaysThrows()) {
+ throwing_invoke = it.Current();
+ break;
+ }
+ }
+
+ if (throwing_invoke == nullptr) {
+ // No always-throwing instruction found. Continue with the rest of the blocks.
continue;
}
- if (prev->AlwaysThrows() &&
- last->IsGoto() &&
- block->GetPhis().IsEmpty() &&
- block->GetPredecessors().size() == 1u) {
- HBasicBlock* pred = block->GetSinglePredecessor();
- HBasicBlock* succ = block->GetSingleSuccessor();
- // Ensure no computations are merged through throwing block.
- // This does not prevent the optimization per se, but would
- // require an elaborate clean up of the SSA graph.
- if (succ != exit &&
- !block->Dominates(pred) &&
- pred->Dominates(succ) &&
- succ->GetPredecessors().size() > 1u &&
- succ->GetPhis().IsEmpty()) {
- block->ReplaceSuccessor(succ, exit);
- rerun_dominance_and_loop_analysis = true;
- MaybeRecordStat(stats_, MethodCompilationStat::kSimplifyThrowingInvoke);
- // Perform a quick follow up optimization on object != null control dependences
- // that is much cheaper to perform now than in a later phase.
- if (RemoveNonNullControlDependences(pred, block)) {
- MaybeRecordStat(stats_, MethodCompilationStat::kRemovedNullCheck);
- }
- }
+ // If we are already pointing at the exit block we could still remove the instructions
+ // between the always throwing instruction, and the exit block. If we have no other
+ // instructions, just continue since there's nothing to do.
+ if (block->GetSuccessors().size() == 1 &&
+ block->GetSingleSuccessor() == exit &&
+ block->GetLastInstruction()->GetPrevious() == throwing_invoke) {
+ continue;
+ }
+
+ // We split the block at the throwing instruction, and the instructions after the throwing
+ // instructions will be disconnected from the graph after `block` points to the exit.
+ // `RemoveDeadBlocks` will take care of removing this new block and its instructions.
+ // Even though `SplitBefore` doesn't guarantee the graph to remain in SSA form, it is fine
+ // since we do not break it.
+ HBasicBlock* new_block = block->SplitBefore(throwing_invoke->GetNext(),
+ /* require_graph_not_in_ssa_form= */ false);
+ DCHECK_EQ(block->GetSingleSuccessor(), new_block);
+ block->ReplaceSuccessor(new_block, exit);
+
+ rerun_dominance_and_loop_analysis = true;
+ MaybeRecordStat(stats_, MethodCompilationStat::kSimplifyThrowingInvoke);
+ // Perform a quick follow up optimization on object != null control dependences
+ // that is much cheaper to perform now than in a later phase.
+ // If there are multiple predecessors, none may end with a HIf as required in
+ // RemoveNonNullControlDependences because we split critical edges.
+ if (block->GetPredecessors().size() == 1u &&
+ RemoveNonNullControlDependences(block->GetSinglePredecessor(), block)) {
+ MaybeRecordStat(stats_, MethodCompilationStat::kRemovedNullCheck);
}
}
@@ -303,54 +319,45 @@ bool HDeadCodeElimination::SimplifyAlwaysThrows() {
return false;
}
-// Simplify the pattern:
-//
-// B1 B2 ...
-// goto goto goto
-// \ | /
-// \ | /
-// B3
-// i1 = phi(input, input)
-// (i2 = condition on i1)
-// if i1 (or i2)
-// / \
-// / \
-// B4 B5
-//
-// Into:
-//
-// B1 B2 ...
-// | | |
-// B4 B5 B?
-//
-// Note that individual edges can be redirected (for example B2->B3
-// can be redirected as B2->B5) without applying this optimization
-// to other incoming edges.
-//
-// This simplification cannot be applied to catch blocks, because
-// exception handler edges do not represent normal control flow.
-// Though in theory this could still apply to normal control flow
-// going directly to a catch block, we cannot support it at the
-// moment because the catch Phi's inputs do not correspond to the
-// catch block's predecessors, so we cannot identify which
-// predecessor corresponds to a given statically evaluated input.
-//
-// We do not apply this optimization to loop headers as this could
-// create irreducible loops. We rely on the suspend check in the
-// loop header to prevent the pattern match.
-//
-// Note that we rely on the dead code elimination to get rid of B3.
bool HDeadCodeElimination::SimplifyIfs() {
bool simplified_one_or_more_ifs = false;
bool rerun_dominance_and_loop_analysis = false;
- for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ // Iterating in PostOrder it's better for MaybeAddPhi as it can add a Phi for multiple If
+ // instructions in a chain without updating the dominator chain. The branch redirection itself can
+ // work in PostOrder or ReversePostOrder without issues.
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
+ if (block->IsCatchBlock()) {
+ // This simplification cannot be applied to catch blocks, because exception handler edges do
+ // not represent normal control flow. Though in theory this could still apply to normal
+ // control flow going directly to a catch block, we cannot support it at the moment because
+ // the catch Phi's inputs do not correspond to the catch block's predecessors, so we cannot
+ // identify which predecessor corresponds to a given statically evaluated input.
+ continue;
+ }
+
HInstruction* last = block->GetLastInstruction();
- HInstruction* first = block->GetFirstInstruction();
- if (!block->IsCatchBlock() &&
- last->IsIf() &&
- block->HasSinglePhi() &&
+ if (!last->IsIf()) {
+ continue;
+ }
+
+ if (block->IsLoopHeader()) {
+ // We do not apply this optimization to loop headers as this could create irreducible loops.
+ continue;
+ }
+
+ // We will add a Phi which allows the simplification to take place in cases where it wouldn't.
+ MaybeAddPhi(block);
+
+ // TODO(solanes): Investigate support for multiple phis in `block`. We can potentially "push
+ // downwards" existing Phis into the true/false branches. For example, let's say we have another
+ // Phi: Phi(x1,x2,x3,x4,x5,x6). This could turn into Phi(x1,x2) in the true branch, Phi(x3,x4)
+ // in the false branch, and remain as Phi(x5,x6) in `block` (for edges that we couldn't
+ // redirect). We might even be able to remove some phis altogether as they will have only one
+ // value.
+ if (block->HasSinglePhi() &&
block->GetFirstPhi()->HasOnlyOneNonEnvironmentUse()) {
+ HInstruction* first = block->GetFirstInstruction();
bool has_only_phi_and_if = (last == first) && (last->InputAt(0) == block->GetFirstPhi());
bool has_only_phi_condition_and_if =
!has_only_phi_and_if &&
@@ -361,7 +368,6 @@ bool HDeadCodeElimination::SimplifyIfs() {
first->HasOnlyOneNonEnvironmentUse();
if (has_only_phi_and_if || has_only_phi_condition_and_if) {
- DCHECK(!block->IsLoopHeader());
HPhi* phi = block->GetFirstPhi()->AsPhi();
bool phi_input_is_left = (first->InputAt(0) == phi);
@@ -446,6 +452,125 @@ bool HDeadCodeElimination::SimplifyIfs() {
return simplified_one_or_more_ifs;
}
+void HDeadCodeElimination::MaybeAddPhi(HBasicBlock* block) {
+ DCHECK(block->GetLastInstruction()->IsIf());
+ HIf* if_instruction = block->GetLastInstruction()->AsIf();
+ if (if_instruction->InputAt(0)->IsConstant()) {
+ // Constant values are handled in RemoveDeadBlocks.
+ return;
+ }
+
+ if (block->GetNumberOfPredecessors() < 2u) {
+ // Nothing to redirect.
+ return;
+ }
+
+ if (!block->GetPhis().IsEmpty()) {
+ // SimplifyIf doesn't currently work with multiple phis. Adding a phi here won't help that
+ // optimization.
+ return;
+ }
+
+ HBasicBlock* dominator = block->GetDominator();
+ if (!dominator->EndsWithIf()) {
+ return;
+ }
+
+ HInstruction* input = if_instruction->InputAt(0);
+ HInstruction* dominator_input = dominator->GetLastInstruction()->AsIf()->InputAt(0);
+ const bool same_input = dominator_input == input;
+ if (!same_input) {
+ // Try to see if the dominator has the opposite input (e.g. if(cond) and if(!cond)). If that's
+ // the case, we can perform the optimization with the false and true branches reversed.
+ if (!dominator_input->IsCondition() || !input->IsCondition()) {
+ return;
+ }
+
+ HCondition* block_cond = input->AsCondition();
+ HCondition* dominator_cond = dominator_input->AsCondition();
+
+ if (block_cond->GetLeft() != dominator_cond->GetLeft() ||
+ block_cond->GetRight() != dominator_cond->GetRight() ||
+ block_cond->GetOppositeCondition() != dominator_cond->GetCondition()) {
+ return;
+ }
+ }
+
+ if (kIsDebugBuild) {
+ // `block`'s successors should have only one predecessor. Otherwise, we have a critical edge in
+ // the graph.
+ for (HBasicBlock* succ : block->GetSuccessors()) {
+ DCHECK_EQ(succ->GetNumberOfPredecessors(), 1u);
+ }
+ }
+
+ const size_t pred_size = block->GetNumberOfPredecessors();
+ HPhi* new_phi = new (graph_->GetAllocator())
+ HPhi(graph_->GetAllocator(), kNoRegNumber, pred_size, DataType::Type::kInt32);
+
+ for (size_t index = 0; index < pred_size; index++) {
+ HBasicBlock* pred = block->GetPredecessors()[index];
+ const bool dominated_by_true =
+ dominator->GetLastInstruction()->AsIf()->IfTrueSuccessor()->Dominates(pred);
+ const bool dominated_by_false =
+ dominator->GetLastInstruction()->AsIf()->IfFalseSuccessor()->Dominates(pred);
+ if (dominated_by_true == dominated_by_false) {
+ // In this case, we can't know if we are coming from the true branch, or the false branch. It
+ // happens in cases like:
+ // 1 (outer if)
+ // / \
+ // 2 3 (inner if)
+ // | / \
+ // | 4 5
+ // \/ |
+ // 6 |
+ // \ |
+ // 7 (has the same if(cond) as 1)
+ // |
+ // 8
+ // `7` (which would be `block` in this example), and `6` will come from both the true path and
+ // the false path of `1`. We bumped into something similar in SelectGenerator. See
+ // HSelectGenerator::TryFixupDoubleDiamondPattern.
+ // TODO(solanes): Figure out if we can fix up the graph into a double diamond in a generic way
+ // so that DeadCodeElimination and SelectGenerator can take advantage of it.
+
+ if (!same_input) {
+ // `1` and `7` having the opposite condition is a case we are missing. We could potentially
+ // add a BooleanNot instruction to be able to add the Phi, but it seems like overkill since
+ // this case is not that common.
+ return;
+ }
+
+ // The Phi will have `0`, `1`, and `cond` as inputs. If SimplifyIf redirects 0s and 1s, we
+ // will end up with Phi(cond,...,cond) which will be replaced by `cond`. Effectively, we will
+ // redirect edges that we are able to redirect and the rest will remain as before (i.e. we
+ // won't have an extra Phi).
+ new_phi->SetRawInputAt(index, input);
+ } else {
+ // Redirect to either the true branch (1), or the false branch (0).
+ // Given that `dominated_by_true` is the exact opposite of `dominated_by_false`,
+ // `(same_input && dominated_by_true) || (!same_input && dominated_by_false)` is equivalent to
+ // `same_input == dominated_by_true`.
+ new_phi->SetRawInputAt(
+ index,
+ same_input == dominated_by_true ? graph_->GetIntConstant(1) : graph_->GetIntConstant(0));
+ }
+ }
+
+ block->AddPhi(new_phi);
+ if_instruction->ReplaceInput(new_phi, 0);
+
+ // Remove the old input now, if possible. This allows the branch redirection in SimplifyIf to
+ // work without waiting for another pass of DCE.
+ if (input->IsDeadAndRemovable()) {
+ DCHECK(!same_input)
+ << " if both blocks have the same condition, it shouldn't be dead and removable since the "
+ << "dominator block's If instruction would be using that condition.";
+ input->GetBlock()->RemoveInstruction(input);
+ }
+ MaybeRecordStat(stats_, MethodCompilationStat::kSimplifyIfAddedPhi);
+}
+
void HDeadCodeElimination::ConnectSuccessiveBlocks() {
// Order does not matter. Skip the entry block by starting at index 1 in reverse post order.
for (size_t i = 1u, size = graph_->GetReversePostOrder().size(); i != size; ++i) {
@@ -466,7 +591,192 @@ void HDeadCodeElimination::ConnectSuccessiveBlocks() {
}
}
-bool HDeadCodeElimination::RemoveDeadBlocks() {
+struct HDeadCodeElimination::TryBelongingInformation {
+ explicit TryBelongingInformation(ScopedArenaAllocator* allocator)
+ : blocks_in_try(allocator->Adapter(kArenaAllocDCE)),
+ coalesced_try_entries(allocator->Adapter(kArenaAllocDCE)) {}
+
+ // Which blocks belong in the try.
+ ScopedArenaSet<HBasicBlock*> blocks_in_try;
+ // Which other try entries are referencing this same try.
+ ScopedArenaSet<HBasicBlock*> coalesced_try_entries;
+};
+
+bool HDeadCodeElimination::CanPerformTryRemoval(const TryBelongingInformation& try_belonging_info) {
+ for (HBasicBlock* block : try_belonging_info.blocks_in_try) {
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ if (it.Current()->CanThrow()) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+void HDeadCodeElimination::DisconnectHandlersAndUpdateTryBoundary(
+ HBasicBlock* block,
+ /* out */ bool* any_block_in_loop) {
+ if (block->IsInLoop()) {
+ *any_block_in_loop = true;
+ }
+
+ // Disconnect the handlers.
+ while (block->GetSuccessors().size() > 1) {
+ HBasicBlock* handler = block->GetSuccessors()[1];
+ DCHECK(handler->IsCatchBlock());
+ block->RemoveSuccessor(handler);
+ handler->RemovePredecessor(block);
+ if (handler->IsInLoop()) {
+ *any_block_in_loop = true;
+ }
+ }
+
+ // Change TryBoundary to Goto.
+ DCHECK(block->EndsWithTryBoundary());
+ HInstruction* last = block->GetLastInstruction();
+ block->RemoveInstruction(last);
+ block->AddInstruction(new (graph_->GetAllocator()) HGoto(last->GetDexPc()));
+ DCHECK_EQ(block->GetSuccessors().size(), 1u);
+}
+
+void HDeadCodeElimination::RemoveTry(HBasicBlock* try_entry,
+ const TryBelongingInformation& try_belonging_info,
+ /* out */ bool* any_block_in_loop) {
+ // Update all try entries.
+ DCHECK(try_entry->EndsWithTryBoundary());
+ DCHECK(try_entry->GetLastInstruction()->AsTryBoundary()->IsEntry());
+ DisconnectHandlersAndUpdateTryBoundary(try_entry, any_block_in_loop);
+
+ for (HBasicBlock* other_try_entry : try_belonging_info.coalesced_try_entries) {
+ DCHECK(other_try_entry->EndsWithTryBoundary());
+ DCHECK(other_try_entry->GetLastInstruction()->AsTryBoundary()->IsEntry());
+ DisconnectHandlersAndUpdateTryBoundary(other_try_entry, any_block_in_loop);
+ }
+
+ // Update the blocks in the try.
+ for (HBasicBlock* block : try_belonging_info.blocks_in_try) {
+ // Update the try catch information since now the try doesn't exist.
+ block->SetTryCatchInformation(nullptr);
+ if (block->IsInLoop()) {
+ *any_block_in_loop = true;
+ }
+
+ if (block->EndsWithTryBoundary()) {
+ // Try exits.
+ DCHECK(!block->GetLastInstruction()->AsTryBoundary()->IsEntry());
+ DisconnectHandlersAndUpdateTryBoundary(block, any_block_in_loop);
+
+ if (block->GetSingleSuccessor()->IsExitBlock()) {
+ // `block` used to be a single exit TryBoundary that got turned into a Goto. It
+ // is now pointing to the exit which we don't allow. To fix it, we disconnect
+ // `block` from its predecessor and RemoveDeadBlocks will remove it from the
+ // graph.
+ DCHECK(block->IsSingleGoto());
+ HBasicBlock* predecessor = block->GetSinglePredecessor();
+ predecessor->ReplaceSuccessor(block, graph_->GetExitBlock());
+
+ if (!block->GetDominatedBlocks().empty()) {
+ // Update domination tree if `block` dominates a block to keep the graph consistent.
+ DCHECK_EQ(block->GetDominatedBlocks().size(), 1u);
+ DCHECK_EQ(graph_->GetExitBlock()->GetDominator(), block);
+ predecessor->AddDominatedBlock(graph_->GetExitBlock());
+ graph_->GetExitBlock()->SetDominator(predecessor);
+ block->RemoveDominatedBlock(graph_->GetExitBlock());
+ }
+ }
+ }
+ }
+}
+
+bool HDeadCodeElimination::RemoveUnneededTries() {
+ if (!graph_->HasTryCatch()) {
+ return false;
+ }
+
+ // Use local allocator for allocating memory.
+ ScopedArenaAllocator allocator(graph_->GetArenaStack());
+
+ // Collect which blocks are part of which try.
+ std::unordered_map<HBasicBlock*, TryBelongingInformation> tries;
+ for (HBasicBlock* block : graph_->GetReversePostOrderSkipEntryBlock()) {
+ if (block->IsTryBlock()) {
+ HBasicBlock* key = block->GetTryCatchInformation()->GetTryEntry().GetBlock();
+ auto it = tries.find(key);
+ if (it == tries.end()) {
+ it = tries.insert({key, TryBelongingInformation(&allocator)}).first;
+ }
+ it->second.blocks_in_try.insert(block);
+ }
+ }
+
+ // Deduplicate the tries which have different try entries but they are really the same try.
+ for (auto it = tries.begin(); it != tries.end(); it++) {
+ DCHECK(it->first->EndsWithTryBoundary());
+ HTryBoundary* try_boundary = it->first->GetLastInstruction()->AsTryBoundary();
+ for (auto other_it = next(it); other_it != tries.end(); /*other_it++ in the loop*/) {
+ DCHECK(other_it->first->EndsWithTryBoundary());
+ HTryBoundary* other_try_boundary = other_it->first->GetLastInstruction()->AsTryBoundary();
+ if (try_boundary->HasSameExceptionHandlersAs(*other_try_boundary)) {
+ // Merge the entries as they are really the same one.
+ // Block merging.
+ it->second.blocks_in_try.insert(other_it->second.blocks_in_try.begin(),
+ other_it->second.blocks_in_try.end());
+
+ // Add the coalesced try entry to update it too.
+ it->second.coalesced_try_entries.insert(other_it->first);
+
+ // Erase the other entry.
+ other_it = tries.erase(other_it);
+ } else {
+ other_it++;
+ }
+ }
+ }
+
+ size_t removed_tries = 0;
+ bool any_block_in_loop = false;
+
+ // Check which tries contain throwing instructions.
+ for (const auto& entry : tries) {
+ if (CanPerformTryRemoval(entry.second)) {
+ ++removed_tries;
+ RemoveTry(entry.first, entry.second, &any_block_in_loop);
+ }
+ }
+
+ if (removed_tries != 0) {
+ // We want to:
+ // 1) Update the dominance information
+ // 2) Remove catch block subtrees, if they are now unreachable.
+ // If we run the dominance recomputation without removing the code, those catch blocks will
+ // not be part of the post order and won't be removed. If we don't run the dominance
+ // recomputation, we risk RemoveDeadBlocks not running it and leaving the graph in an
+ // inconsistent state. So, what we can do is run RemoveDeadBlocks and force a recomputation.
+ // Note that we are not guaranteed to remove a catch block if we have nested try blocks:
+ //
+ // try {
+ // ... nothing can throw. TryBoundary A ...
+ // try {
+ // ... can throw. TryBoundary B...
+ // } catch (Error e) {}
+ // } catch (Exception e) {}
+ //
+ // In the example above, we can remove the TryBoundary A but the Exception catch cannot be
+ // removed as the TryBoundary B might still throw into that catch. TryBoundary A and B don't get
+ // coalesced since they have different catch handlers.
+
+ RemoveDeadBlocks(/* force_recomputation= */ true, any_block_in_loop);
+ MaybeRecordStat(stats_, MethodCompilationStat::kRemovedTry, removed_tries);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool HDeadCodeElimination::RemoveDeadBlocks(bool force_recomputation,
+ bool force_loop_recomputation) {
+ DCHECK_IMPLIES(force_loop_recomputation, force_recomputation);
+
// Use local allocator for allocating memory.
ScopedArenaAllocator allocator(graph_->GetArenaStack());
@@ -495,8 +805,8 @@ bool HDeadCodeElimination::RemoveDeadBlocks() {
// If we removed at least one block, we need to recompute the full
// dominator tree and try block membership.
- if (removed_one_or_more_blocks) {
- if (rerun_dominance_and_loop_analysis) {
+ if (removed_one_or_more_blocks || force_recomputation) {
+ if (rerun_dominance_and_loop_analysis || force_loop_recomputation) {
graph_->ClearLoopInformation();
graph_->ClearDominanceInformation();
graph_->BuildDominatorTree();
@@ -530,6 +840,33 @@ void HDeadCodeElimination::RemoveDeadInstructions() {
}
}
+void HDeadCodeElimination::UpdateGraphFlags() {
+ bool has_monitor_operations = false;
+ bool has_simd = false;
+ bool has_bounds_checks = false;
+ bool has_always_throwing_invokes = false;
+
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instruction = it.Current();
+ if (instruction->IsMonitorOperation()) {
+ has_monitor_operations = true;
+ } else if (instruction->IsVecOperation()) {
+ has_simd = true;
+ } else if (instruction->IsBoundsCheck()) {
+ has_bounds_checks = true;
+ } else if (instruction->IsInvoke() && instruction->AsInvoke()->AlwaysThrows()) {
+ has_always_throwing_invokes = true;
+ }
+ }
+ }
+
+ graph_->SetHasMonitorOperations(has_monitor_operations);
+ graph_->SetHasSIMD(has_simd);
+ graph_->SetHasBoundsChecks(has_bounds_checks);
+ graph_->SetHasAlwaysThrowingInvokes(has_always_throwing_invokes);
+}
+
bool HDeadCodeElimination::Run() {
// Do not eliminate dead blocks if the graph has irreducible loops. We could
// support it, but that would require changes in our loop representation to handle
@@ -541,6 +878,11 @@ bool HDeadCodeElimination::Run() {
did_any_simplification |= SimplifyAlwaysThrows();
did_any_simplification |= SimplifyIfs();
did_any_simplification |= RemoveDeadBlocks();
+ // We call RemoveDeadBlocks before RemoveUnneededTries to remove the dead blocks from the
+ // previous optimizations. Otherwise, we might detect that a try has throwing instructions but
+ // they are actually dead code. RemoveUnneededTryBoundary will call RemoveDeadBlocks again if
+ // needed.
+ did_any_simplification |= RemoveUnneededTries();
if (did_any_simplification) {
// Connect successive blocks created by dead branches.
ConnectSuccessiveBlocks();
@@ -548,6 +890,7 @@ bool HDeadCodeElimination::Run() {
}
SsaRedundantPhiElimination(graph_).Run();
RemoveDeadInstructions();
+ UpdateGraphFlags();
return true;
}
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index 799721acf2..ddd01f7103 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -17,11 +17,12 @@
#ifndef ART_COMPILER_OPTIMIZING_DEAD_CODE_ELIMINATION_H_
#define ART_COMPILER_OPTIMIZING_DEAD_CODE_ELIMINATION_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
#include "optimizing_compiler_stats.h"
-namespace art {
+namespace art HIDDEN {
/**
* Optimization pass performing dead code elimination (removal of
@@ -39,11 +40,87 @@ class HDeadCodeElimination : public HOptimization {
private:
void MaybeRecordDeadBlock(HBasicBlock* block);
void MaybeRecordSimplifyIf();
- bool RemoveDeadBlocks();
+ // If `force_recomputation` is true, we will recompute the dominance information even when we
+ // didn't delete any blocks. `force_loop_recomputation` is similar but it also forces the loop
+ // information recomputation.
+ bool RemoveDeadBlocks(bool force_recomputation = false, bool force_loop_recomputation = false);
void RemoveDeadInstructions();
bool SimplifyAlwaysThrows();
+ // Simplify the pattern:
+ //
+ // B1 B2 ...
+ // goto goto goto
+ // \ | /
+ // \ | /
+ // B3
+ // i1 = phi(input, input)
+ // (i2 = condition on i1)
+ // if i1 (or i2)
+ // / \
+ // / \
+ // B4 B5
+ //
+ // Into:
+ //
+ // B1 B2 ...
+ // | | |
+ // B4 B5 B?
+ //
+ // Note that individual edges can be redirected (for example B2->B3
+ // can be redirected as B2->B5) without applying this optimization
+ // to other incoming edges.
+ //
+ // Note that we rely on the dead code elimination to get rid of B3.
bool SimplifyIfs();
void ConnectSuccessiveBlocks();
+ // Updates the graph flags related to instructions (e.g. HasSIMD()) since we may have eliminated
+ // the relevant instructions. There's no need to update `SetHasTryCatch` since we do that in
+ // `ComputeTryBlockInformation`. Similarly with `HasLoops` and `HasIrreducibleLoops`: They are
+ // cleared in `ClearLoopInformation` and then set as true as part of `HLoopInformation::Populate`,
+ // if needed.
+ void UpdateGraphFlags();
+
+ // Helper struct to eliminate tries.
+ struct TryBelongingInformation;
+ // Disconnects `block`'s handlers and update its `TryBoundary` instruction to a `Goto`.
+ // Sets `any_block_in_loop` to true if any block is currently a loop to later update the loop
+ // information if needed.
+ void DisconnectHandlersAndUpdateTryBoundary(HBasicBlock* block,
+ /* out */ bool* any_block_in_loop);
+ // Returns true iff the try doesn't contain throwing instructions.
+ bool CanPerformTryRemoval(const TryBelongingInformation& try_belonging_info);
+ // Removes the try by disconnecting all try entries and exits from their handlers. Also updates
+ // the graph in the case that a `TryBoundary` instruction of kind `exit` has the Exit block as
+ // its successor.
+ void RemoveTry(HBasicBlock* try_entry,
+ const TryBelongingInformation& try_belonging_info,
+ bool* any_block_in_loop);
+ // Checks which tries (if any) are currently in the graph, coalesces the different try entries
+ // that are referencing the same try, and removes the tries which don't contain any throwing
+ // instructions.
+ bool RemoveUnneededTries();
+
+ // Adds a phi in `block`, if `block` and its dominator have the same (or opposite) condition.
+ // For example it turns:
+ // if(cond)
+ // / \
+ // B1 B2
+ // \ /
+ // if(cond)
+ // / \
+ // B3 B4
+ //
+ // into:
+ // if(cond)
+ // / \
+ // B1 B2
+ // \ /
+ // if(Phi(1, 0))
+ // / \
+ // B3 B4
+ //
+ // Following this, SimplifyIfs is able to connect B1->B3 and B2->B4 effectively skipping an if.
+ void MaybeAddPhi(HBasicBlock* block);
DISALLOW_COPY_AND_ASSIGN(HDeadCodeElimination);
};
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index f5cd4dc27a..b789434add 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -16,6 +16,7 @@
#include "dead_code_elimination.h"
+#include "base/macros.h"
#include "driver/compiler_options.h"
#include "graph_checker.h"
#include "optimizing_unit_test.h"
@@ -23,9 +24,9 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
-class DeadCodeEliminationTest : public OptimizingUnitTest {
+class DeadCodeEliminationTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
void TestCode(const std::vector<uint16_t>& data,
const std::string& expected_before,
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index 1d72ba116e..5f366ebcd9 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -15,6 +15,7 @@
*/
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "dex/dex_instruction.h"
#include "nodes.h"
@@ -22,9 +23,9 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
-class OptimizerTest : public OptimizingUnitTest {
+class OptimizerTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
void TestCode(const std::vector<uint16_t>& data, const uint32_t* blocks, size_t blocks_length);
};
diff --git a/compiler/optimizing/escape.cc b/compiler/optimizing/escape.cc
index 617833c697..cebe94fd0d 100644
--- a/compiler/optimizing/escape.cc
+++ b/compiler/optimizing/escape.cc
@@ -18,7 +18,7 @@
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
void VisitEscapes(HInstruction* reference, EscapeVisitor& escape_visitor) {
// References not allocated in the method are intrinsically escaped.
diff --git a/compiler/optimizing/escape.h b/compiler/optimizing/escape.h
index 5402cb1763..3b284fbf43 100644
--- a/compiler/optimizing/escape.h
+++ b/compiler/optimizing/escape.h
@@ -17,7 +17,9 @@
#ifndef ART_COMPILER_OPTIMIZING_ESCAPE_H_
#define ART_COMPILER_OPTIMIZING_ESCAPE_H_
-namespace art {
+#include "base/macros.h"
+
+namespace art HIDDEN {
class HInstruction;
diff --git a/compiler/optimizing/execution_subgraph.cc b/compiler/optimizing/execution_subgraph.cc
index 66fdfcda5b..06aabbe040 100644
--- a/compiler/optimizing/execution_subgraph.cc
+++ b/compiler/optimizing/execution_subgraph.cc
@@ -26,7 +26,7 @@
#include "base/scoped_arena_allocator.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
ExecutionSubgraph::ExecutionSubgraph(HGraph* graph, ScopedArenaAllocator* allocator)
: graph_(graph),
diff --git a/compiler/optimizing/execution_subgraph.h b/compiler/optimizing/execution_subgraph.h
index 7d2a66077d..5ddf17de60 100644
--- a/compiler/optimizing/execution_subgraph.h
+++ b/compiler/optimizing/execution_subgraph.h
@@ -27,6 +27,7 @@
#include "base/bit_vector-inl.h"
#include "base/globals.h"
#include "base/iteration_range.h"
+#include "base/macros.h"
#include "base/mutex.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
@@ -34,7 +35,7 @@
#include "base/transform_iterator.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
// Helper for transforming blocks to block_ids.
class BlockToBlockIdTransformer {
diff --git a/compiler/optimizing/execution_subgraph_test.cc b/compiler/optimizing/execution_subgraph_test.cc
index 74c243b5b4..921ef056ba 100644
--- a/compiler/optimizing/execution_subgraph_test.cc
+++ b/compiler/optimizing/execution_subgraph_test.cc
@@ -37,7 +37,7 @@
#include "optimizing_unit_test.h"
#include "scoped_thread_state_change.h"
-namespace art {
+namespace art HIDDEN {
using BlockSet = std::unordered_set<const HBasicBlock*>;
diff --git a/compiler/optimizing/execution_subgraph_test.h b/compiler/optimizing/execution_subgraph_test.h
index 13cb2bc7c5..cee105a045 100644
--- a/compiler/optimizing/execution_subgraph_test.h
+++ b/compiler/optimizing/execution_subgraph_test.h
@@ -19,7 +19,9 @@
#include "android-base/macros.h"
-namespace art {
+#include "base/macros.h"
+
+namespace art HIDDEN {
class HGraph;
class ExecutionSubgraph;
diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc
index 75b8e9609e..8857b2a775 100644
--- a/compiler/optimizing/find_loops_test.cc
+++ b/compiler/optimizing/find_loops_test.cc
@@ -15,6 +15,7 @@
*/
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "dex/dex_file.h"
#include "dex/dex_instruction.h"
@@ -25,9 +26,9 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
-class FindLoopsTest : public OptimizingUnitTest {};
+class FindLoopsTest : public CommonCompilerTest, public OptimizingUnitTestHelper {};
TEST_F(FindLoopsTest, CFG1) {
// Constant is not used.
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index d1769cea0d..190b362145 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -32,7 +32,7 @@
#include "scoped_thread_state_change-inl.h"
#include "subtype_check.h"
-namespace art {
+namespace art HIDDEN {
using android::base::StringPrintf;
@@ -80,9 +80,91 @@ size_t GraphChecker::Run(bool pass_change, size_t last_size) {
// as the latter might visit dead blocks removed by the dominator
// computation.
VisitReversePostOrder();
+ CheckGraphFlags();
return current_size;
}
+void GraphChecker::VisitReversePostOrder() {
+ for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
+ if (block->IsInLoop()) {
+ flag_info_.seen_loop = true;
+ if (block->GetLoopInformation()->IsIrreducible()) {
+ flag_info_.seen_irreducible_loop = true;
+ }
+ }
+
+ VisitBasicBlock(block);
+ }
+}
+
+static const char* StrBool(bool val) {
+ return val ? "true" : "false";
+}
+
+void GraphChecker::CheckGraphFlags() {
+ if (GetGraph()->HasMonitorOperations() != flag_info_.seen_monitor_operation) {
+ AddError(
+ StringPrintf("Flag mismatch: HasMonitorOperations() (%s) should be equal to "
+ "flag_info_.seen_monitor_operation (%s)",
+ StrBool(GetGraph()->HasMonitorOperations()),
+ StrBool(flag_info_.seen_monitor_operation)));
+ }
+
+ if (GetGraph()->HasTryCatch() != flag_info_.seen_try_boundary) {
+ AddError(
+ StringPrintf("Flag mismatch: HasTryCatch() (%s) should be equal to "
+ "flag_info_.seen_try_boundary (%s)",
+ StrBool(GetGraph()->HasTryCatch()),
+ StrBool(flag_info_.seen_try_boundary)));
+ }
+
+ if (GetGraph()->HasLoops() != flag_info_.seen_loop) {
+ AddError(
+ StringPrintf("Flag mismatch: HasLoops() (%s) should be equal to "
+ "flag_info_.seen_loop (%s)",
+ StrBool(GetGraph()->HasLoops()),
+ StrBool(flag_info_.seen_loop)));
+ }
+
+ if (GetGraph()->HasIrreducibleLoops() && !GetGraph()->HasLoops()) {
+ AddError(StringPrintf("Flag mismatch: HasIrreducibleLoops() (%s) implies HasLoops() (%s)",
+ StrBool(GetGraph()->HasIrreducibleLoops()),
+ StrBool(GetGraph()->HasLoops())));
+ }
+
+ if (GetGraph()->HasIrreducibleLoops() != flag_info_.seen_irreducible_loop) {
+ AddError(
+ StringPrintf("Flag mismatch: HasIrreducibleLoops() (%s) should be equal to "
+ "flag_info_.seen_irreducible_loop (%s)",
+ StrBool(GetGraph()->HasIrreducibleLoops()),
+ StrBool(flag_info_.seen_irreducible_loop)));
+ }
+
+ if (GetGraph()->HasSIMD() != flag_info_.seen_SIMD) {
+ AddError(
+ StringPrintf("Flag mismatch: HasSIMD() (%s) should be equal to "
+ "flag_info_.seen_SIMD (%s)",
+ StrBool(GetGraph()->HasSIMD()),
+ StrBool(flag_info_.seen_SIMD)));
+ }
+
+ if (GetGraph()->HasBoundsChecks() != flag_info_.seen_bounds_checks) {
+ AddError(
+ StringPrintf("Flag mismatch: HasBoundsChecks() (%s) should be equal to "
+ "flag_info_.seen_bounds_checks (%s)",
+ StrBool(GetGraph()->HasBoundsChecks()),
+ StrBool(flag_info_.seen_bounds_checks)));
+ }
+
+ if (GetGraph()->HasAlwaysThrowingInvokes() != flag_info_.seen_always_throwing_invokes) {
+ AddError(
+ StringPrintf("Flag mismatch: HasAlwaysThrowingInvokes() (%s) should be equal to "
+ "flag_info_.seen_always_throwing_invokes (%s)",
+ StrBool(GetGraph()->HasAlwaysThrowingInvokes()),
+ StrBool(flag_info_.seen_always_throwing_invokes)));
+ }
+}
+
void GraphChecker::VisitBasicBlock(HBasicBlock* block) {
current_block_ = block;
@@ -159,6 +241,24 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) {
}
}
+ // Make sure the first instruction of a catch block is always a Nop that emits an environment.
+ if (block->IsCatchBlock()) {
+ if (!block->GetFirstInstruction()->IsNop()) {
+ AddError(StringPrintf("Block %d doesn't have a Nop as its first instruction.",
+ current_block_->GetBlockId()));
+ } else {
+ HNop* nop = block->GetFirstInstruction()->AsNop();
+ if (!nop->NeedsEnvironment()) {
+ AddError(
+ StringPrintf("%s:%d is a Nop and the first instruction of block %d, but it doesn't "
+ "need an environment.",
+ nop->DebugName(),
+ nop->GetId(),
+ current_block_->GetBlockId()));
+ }
+ }
+ }
+
// Visit this block's list of phis.
for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
HInstruction* current = it.Current();
@@ -219,6 +319,12 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) {
}
}
+ // Ensure all blocks have at least one successor, except the Exit block.
+ if (block->GetSuccessors().empty() && !block->IsExitBlock()) {
+ AddError(StringPrintf("Block %d has no successor and it is not the Exit block.",
+ block->GetBlockId()));
+ }
+
// Ensure there is no critical edge (i.e., an edge connecting a
// block with multiple successors to a block with multiple
// predecessors). Exceptional edges are synthesized and hence
@@ -291,27 +397,30 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) {
}
void GraphChecker::VisitBoundsCheck(HBoundsCheck* check) {
+ VisitInstruction(check);
+
if (!GetGraph()->HasBoundsChecks()) {
- AddError(StringPrintf("Instruction %s:%d is a HBoundsCheck, "
- "but HasBoundsChecks() returns false",
- check->DebugName(),
- check->GetId()));
+ AddError(
+ StringPrintf("The graph doesn't have the HasBoundsChecks flag set but we saw "
+ "%s:%d in block %d.",
+ check->DebugName(),
+ check->GetId(),
+ check->GetBlock()->GetBlockId()));
}
- // Perform the instruction base checks too.
- VisitInstruction(check);
+ flag_info_.seen_bounds_checks = true;
}
void GraphChecker::VisitDeoptimize(HDeoptimize* deopt) {
+ VisitInstruction(deopt);
if (GetGraph()->IsCompilingOsr()) {
AddError(StringPrintf("A graph compiled OSR cannot have a HDeoptimize instruction"));
}
-
- // Perform the instruction base checks too.
- VisitInstruction(deopt);
}
void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) {
+ VisitInstruction(try_boundary);
+
ArrayRef<HBasicBlock* const> handlers = try_boundary->GetExceptionHandlers();
// Ensure that all exception handlers are catch blocks.
@@ -338,24 +447,65 @@ void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) {
}
}
- VisitInstruction(try_boundary);
+ if (!GetGraph()->HasTryCatch()) {
+ AddError(
+ StringPrintf("The graph doesn't have the HasTryCatch flag set but we saw "
+ "%s:%d in block %d.",
+ try_boundary->DebugName(),
+ try_boundary->GetId(),
+ try_boundary->GetBlock()->GetBlockId()));
+ }
+
+ flag_info_.seen_try_boundary = true;
+}
+
+void GraphChecker::VisitLoadClass(HLoadClass* load) {
+ VisitInstruction(load);
+
+ if (load->GetLoadedClassRTI().IsValid() && !load->GetLoadedClassRTI().IsExact()) {
+ std::stringstream ssRTI;
+ ssRTI << load->GetLoadedClassRTI();
+ AddError(StringPrintf("%s:%d in block %d with RTI %s has valid but inexact RTI.",
+ load->DebugName(),
+ load->GetId(),
+ load->GetBlock()->GetBlockId(),
+ ssRTI.str().c_str()));
+ }
}
void GraphChecker::VisitLoadException(HLoadException* load) {
- // Ensure that LoadException is the first instruction in a catch block.
+ VisitInstruction(load);
+
+ // Ensure that LoadException is the second instruction in a catch block. The first one should be a
+ // Nop (checked separately).
if (!load->GetBlock()->IsCatchBlock()) {
AddError(StringPrintf("%s:%d is in a non-catch block %d.",
load->DebugName(),
load->GetId(),
load->GetBlock()->GetBlockId()));
- } else if (load->GetBlock()->GetFirstInstruction() != load) {
- AddError(StringPrintf("%s:%d is not the first instruction in catch block %d.",
+ } else if (load->GetBlock()->GetFirstInstruction()->GetNext() != load) {
+ AddError(StringPrintf("%s:%d is not the second instruction in catch block %d.",
load->DebugName(),
load->GetId(),
load->GetBlock()->GetBlockId()));
}
}
+void GraphChecker::VisitMonitorOperation(HMonitorOperation* monitor_op) {
+ VisitInstruction(monitor_op);
+
+ if (!GetGraph()->HasMonitorOperations()) {
+ AddError(
+ StringPrintf("The graph doesn't have the HasMonitorOperations flag set but we saw "
+ "%s:%d in block %d.",
+ monitor_op->DebugName(),
+ monitor_op->GetId(),
+ monitor_op->GetBlock()->GetBlockId()));
+ }
+
+ flag_info_.seen_monitor_operation = true;
+}
+
void GraphChecker::VisitInstruction(HInstruction* instruction) {
if (seen_ids_.IsBitSet(instruction->GetId())) {
AddError(StringPrintf("Instruction id %d is duplicate in graph.",
@@ -497,33 +647,16 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) {
}
}
- // Ensure that reference type instructions have reference type info.
- if (check_reference_type_info_ && instruction->GetType() == DataType::Type::kReference) {
- if (!instruction->GetReferenceTypeInfo().IsValid()) {
- AddError(StringPrintf("Reference type instruction %s:%d does not have "
- "valid reference type information.",
- instruction->DebugName(),
- instruction->GetId()));
- }
- }
-
if (instruction->CanThrow() && !instruction->HasEnvironment()) {
AddError(StringPrintf("Throwing instruction %s:%d in block %d does not have an environment.",
instruction->DebugName(),
instruction->GetId(),
current_block_->GetBlockId()));
} else if (instruction->CanThrowIntoCatchBlock()) {
- // Find the top-level environment. This corresponds to the environment of
- // the catch block since we do not inline methods with try/catch.
- HEnvironment* environment = instruction->GetEnvironment();
- while (environment->GetParent() != nullptr) {
- environment = environment->GetParent();
- }
-
- // Find all catch blocks and test that `instruction` has an environment
- // value for each one.
+ // Find all catch blocks and test that `instruction` has an environment value for each one.
const HTryBoundary& entry = instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry();
for (HBasicBlock* catch_block : entry.GetExceptionHandlers()) {
+ const HEnvironment* environment = catch_block->GetFirstInstruction()->GetEnvironment();
for (HInstructionIterator phi_it(catch_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
HPhi* catch_phi = phi_it.Current()->AsPhi();
if (environment->GetInstructionAt(catch_phi->GetRegNumber()) == nullptr) {
@@ -541,9 +674,26 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) {
}
}
-void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+void GraphChecker::VisitInvoke(HInvoke* invoke) {
VisitInstruction(invoke);
+ if (invoke->AlwaysThrows()) {
+ if (!GetGraph()->HasAlwaysThrowingInvokes()) {
+ AddError(
+ StringPrintf("The graph doesn't have the HasAlwaysThrowingInvokes flag set but we saw "
+ "%s:%d in block %d and it always throws.",
+ invoke->DebugName(),
+ invoke->GetId(),
+ invoke->GetBlock()->GetBlockId()));
+ }
+ flag_info_.seen_always_throwing_invokes = true;
+ }
+}
+
+void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+ // We call VisitInvoke and not VisitInstruction to de-duplicate the always throwing code check.
+ VisitInvoke(invoke);
+
if (invoke->IsStaticWithExplicitClinitCheck()) {
const HInstruction* last_input = invoke->GetInputs().back();
if (last_input == nullptr) {
@@ -612,6 +762,17 @@ void GraphChecker::CheckTypeCheckBitstringInput(HTypeCheckInstruction* check,
void GraphChecker::HandleTypeCheckInstruction(HTypeCheckInstruction* check) {
VisitInstruction(check);
+
+ if (check->GetTargetClassRTI().IsValid() && !check->GetTargetClassRTI().IsExact()) {
+ std::stringstream ssRTI;
+ ssRTI << check->GetTargetClassRTI();
+ AddError(StringPrintf("%s:%d in block %d with RTI %s has valid but inexact RTI.",
+ check->DebugName(),
+ check->GetId(),
+ check->GetBlock()->GetBlockId(),
+ ssRTI.str().c_str()));
+ }
+
HInstruction* input = check->InputAt(1);
if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
if (!input->IsNullConstant()) {
@@ -674,13 +835,14 @@ void GraphChecker::HandleLoop(HBasicBlock* loop_header) {
loop_information->GetPreHeader()->GetSuccessors().size()));
}
- if (loop_information->GetSuspendCheck() == nullptr) {
- AddError(StringPrintf(
- "Loop with header %d does not have a suspend check.",
- loop_header->GetBlockId()));
+ if (!GetGraph()->SuspendChecksAreAllowedToNoOp() &&
+ loop_information->GetSuspendCheck() == nullptr) {
+ AddError(StringPrintf("Loop with header %d does not have a suspend check.",
+ loop_header->GetBlockId()));
}
- if (loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) {
+ if (!GetGraph()->SuspendChecksAreAllowedToNoOp() &&
+ loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) {
AddError(StringPrintf(
"Loop header %d does not have the loop suspend check as the first instruction.",
loop_header->GetBlockId()));
@@ -1051,6 +1213,21 @@ void GraphChecker::VisitNeg(HNeg* instruction) {
}
}
+void GraphChecker::VisitArraySet(HArraySet* instruction) {
+ VisitInstruction(instruction);
+
+ if (instruction->NeedsTypeCheck() !=
+ instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())) {
+ AddError(
+ StringPrintf("%s %d has a flag mismatch. An ArraySet instruction can trigger a GC iff it "
+ "needs a type check. Needs type check: %s, Can trigger GC: %s",
+ instruction->DebugName(),
+ instruction->GetId(),
+ StrBool(instruction->NeedsTypeCheck()),
+ StrBool(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()))));
+ }
+}
+
void GraphChecker::VisitBinaryOperation(HBinaryOperation* op) {
VisitInstruction(op);
DataType::Type lhs_type = op->InputAt(0)->GetType();
@@ -1111,6 +1288,8 @@ void GraphChecker::VisitBinaryOperation(HBinaryOperation* op) {
}
void GraphChecker::VisitConstant(HConstant* instruction) {
+ VisitInstruction(instruction);
+
HBasicBlock* block = instruction->GetBlock();
if (!block->IsEntryBlock()) {
AddError(StringPrintf(
@@ -1149,6 +1328,18 @@ void GraphChecker::VisitTypeConversion(HTypeConversion* instruction) {
void GraphChecker::VisitVecOperation(HVecOperation* instruction) {
VisitInstruction(instruction);
+
+ if (!GetGraph()->HasSIMD()) {
+ AddError(
+ StringPrintf("The graph doesn't have the HasSIMD flag set but we saw "
+ "%s:%d in block %d.",
+ instruction->DebugName(),
+ instruction->GetId(),
+ instruction->GetBlock()->GetBlockId()));
+ }
+
+ flag_info_.seen_SIMD = true;
+
if (codegen_ == nullptr) {
return;
}
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index 04c8d2103c..d6644f3b50 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -21,10 +21,11 @@
#include "base/arena_bit_vector.h"
#include "base/bit_vector-inl.h"
+#include "base/macros.h"
#include "base/scoped_arena_allocator.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
@@ -54,6 +55,7 @@ class GraphChecker : public HGraphDelegateVisitor {
void VisitInstruction(HInstruction* instruction) override;
void VisitPhi(HPhi* phi) override;
+ void VisitArraySet(HArraySet* instruction) override;
void VisitBinaryOperation(HBinaryOperation* op) override;
void VisitBooleanNot(HBooleanNot* instruction) override;
void VisitBoundType(HBoundType* instruction) override;
@@ -64,8 +66,11 @@ class GraphChecker : public HGraphDelegateVisitor {
void VisitDeoptimize(HDeoptimize* instruction) override;
void VisitIf(HIf* instruction) override;
void VisitInstanceOf(HInstanceOf* check) override;
+ void VisitInvoke(HInvoke* invoke) override;
void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override;
+ void VisitLoadClass(HLoadClass* load) override;
void VisitLoadException(HLoadException* load) override;
+ void VisitMonitorOperation(HMonitorOperation* monitor_operation) override;
void VisitNeg(HNeg* instruction) override;
void VisitPackedSwitch(HPackedSwitch* instruction) override;
void VisitReturn(HReturn* ret) override;
@@ -102,15 +107,6 @@ class GraphChecker : public HGraphDelegateVisitor {
}
}
- // Enable/Disable the reference type info check.
- //
- // Return: the previous status of the check.
- bool SetRefTypeInfoCheckEnabled(bool value = true) {
- bool old_value = check_reference_type_info_;
- check_reference_type_info_ = value;
- return old_value;
- }
-
protected:
// Report a new error.
void AddError(const std::string& error) {
@@ -123,18 +119,30 @@ class GraphChecker : public HGraphDelegateVisitor {
ArenaVector<std::string> errors_;
private:
+ void VisitReversePostOrder();
+
+ // Checks that the graph's flags are set correctly.
+ void CheckGraphFlags();
+
// String displayed before dumped errors.
const char* const dump_prefix_;
ScopedArenaAllocator allocator_;
ArenaBitVector seen_ids_;
- // Whether to perform the reference type info check for instructions which use or produce
- // object references, e.g. HNewInstance, HLoadClass.
- // The default value is true.
- bool check_reference_type_info_ = true;
// Used to access target information.
CodeGenerator* codegen_;
+ struct FlagInfo {
+ bool seen_try_boundary = false;
+ bool seen_monitor_operation = false;
+ bool seen_loop = false;
+ bool seen_irreducible_loop = false;
+ bool seen_SIMD = false;
+ bool seen_bounds_checks = false;
+ bool seen_always_throwing_invokes = false;
+ };
+ FlagInfo flag_info_;
+
DISALLOW_COPY_AND_ASSIGN(GraphChecker);
};
diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc
index 08bfa5d80f..b256fbb46d 100644
--- a/compiler/optimizing/graph_checker_test.cc
+++ b/compiler/optimizing/graph_checker_test.cc
@@ -14,12 +14,13 @@
* limitations under the License.
*/
+#include "base/macros.h"
#include "graph_checker.h"
#include "optimizing_unit_test.h"
-namespace art {
+namespace art HIDDEN {
-class GraphCheckerTest : public OptimizingUnitTest {
+class GraphCheckerTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
HGraph* CreateSimpleCFG();
void TestCode(const std::vector<uint16_t>& data);
diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc
index 29af808731..b5d712736f 100644
--- a/compiler/optimizing/graph_test.cc
+++ b/compiler/optimizing/graph_test.cc
@@ -15,6 +15,7 @@
*/
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
@@ -22,7 +23,7 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
class GraphTest : public OptimizingUnitTest {
protected:
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 4a6ee13005..73bdd1e223 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -43,7 +43,7 @@
#include "ssa_liveness_analysis.h"
#include "utils/assembler.h"
-namespace art {
+namespace art HIDDEN {
// Unique pass-name to identify that the dump is for printing to log.
constexpr const char* kDebugDumpName = "debug";
@@ -480,12 +480,20 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
<< array_set->GetValueCanBeNull() << std::noboolalpha;
StartAttributeStream("needs_type_check") << std::boolalpha
<< array_set->NeedsTypeCheck() << std::noboolalpha;
+ StartAttributeStream("can_trigger_gc")
+ << std::boolalpha << array_set->GetSideEffects().Includes(SideEffects::CanTriggerGC())
+ << std::noboolalpha;
+ StartAttributeStream("write_barrier_kind") << array_set->GetWriteBarrierKind();
}
void VisitCompare(HCompare* compare) override {
StartAttributeStream("bias") << compare->GetBias();
}
+ void VisitCondition(HCondition* condition) override {
+ StartAttributeStream("bias") << condition->GetBias();
+ }
+
void VisitInvoke(HInvoke* invoke) override {
StartAttributeStream("dex_file_index") << invoke->GetMethodReference().index;
ArtMethod* method = invoke->GetResolvedMethod();
@@ -549,7 +557,9 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
iset->GetFieldInfo().GetDexFile().PrettyField(iset->GetFieldInfo().GetFieldIndex(),
/* with type */ false);
StartAttributeStream("field_type") << iset->GetFieldType();
- StartAttributeStream("predicated") << std::boolalpha << iset->GetIsPredicatedSet();
+ StartAttributeStream("predicated")
+ << std::boolalpha << iset->GetIsPredicatedSet() << std::noboolalpha;
+ StartAttributeStream("write_barrier_kind") << iset->GetWriteBarrierKind();
}
void VisitStaticFieldGet(HStaticFieldGet* sget) override {
@@ -564,6 +574,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
sset->GetFieldInfo().GetDexFile().PrettyField(sset->GetFieldInfo().GetFieldIndex(),
/* with type */ false);
StartAttributeStream("field_type") << sset->GetFieldType();
+ StartAttributeStream("write_barrier_kind") << sset->GetWriteBarrierKind();
}
void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) override {
@@ -757,15 +768,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
instruction->IsCheckCast()) {
StartAttributeStream("klass") << "unresolved";
} else {
- // The NullConstant may be added to the graph during other passes that happen between
- // ReferenceTypePropagation and Inliner (e.g. InstructionSimplifier). If the inliner
- // doesn't run or doesn't inline anything, the NullConstant remains untyped.
- // So we should check NullConstants for validity only after reference type propagation.
- DCHECK(graph_in_bad_state_ ||
- IsDebugDump() ||
- (!is_after_pass_ && IsPass(HGraphBuilder::kBuilderPassName)))
- << instruction->DebugName() << instruction->GetId() << " has invalid rti "
- << (is_after_pass_ ? "after" : "before") << " pass " << pass_name_;
+ StartAttributeStream("klass") << "invalid";
}
}
if (disasm_info_ != nullptr) {
@@ -904,6 +907,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
if (block->IsCatchBlock()) {
PrintProperty("flags", "catch_block");
+ } else if (block->IsTryBlock()) {
+ std::stringstream flags_properties;
+ flags_properties << "try_start "
+ << namer_.GetName(block->GetTryCatchInformation()->GetTryEntry().GetBlock());
+ PrintProperty("flags", flags_properties.str().c_str());
} else if (!IsDebugDump()) {
// Don't print useless information to logcat
PrintEmptyProperty("flags");
diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h
index 3429c11cbd..9878917739 100644
--- a/compiler/optimizing/graph_visualizer.h
+++ b/compiler/optimizing/graph_visualizer.h
@@ -22,10 +22,11 @@
#include "arch/instruction_set.h"
#include "base/arena_containers.h"
+#include "base/macros.h"
#include "base/value_object.h"
#include "block_namer.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
class DexCompilationUnit;
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index c7cd223b51..a6ca057cfc 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -23,7 +23,7 @@
#include "base/utils.h"
#include "side_effects_analysis.h"
-namespace art {
+namespace art HIDDEN {
/**
* A ValueSet holds instructions that can replace other instructions. It is updated
diff --git a/compiler/optimizing/gvn.h b/compiler/optimizing/gvn.h
index bbf2265e98..df4e3a8dbf 100644
--- a/compiler/optimizing/gvn.h
+++ b/compiler/optimizing/gvn.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_GVN_H_
#define ART_COMPILER_OPTIMIZING_GVN_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
class SideEffectsAnalysis;
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index 3bf4cc35ba..1eb6307cb1 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -17,12 +17,13 @@
#include "gvn.h"
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "side_effects_analysis.h"
-namespace art {
+namespace art HIDDEN {
class GVNTest : public OptimizingUnitTest {};
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index 3b5a2f1f9d..be6c268f5d 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -16,9 +16,10 @@
#include "induction_var_analysis.h"
+#include "base/scoped_arena_containers.h"
#include "induction_var_range.h"
-namespace art {
+namespace art HIDDEN {
/**
* Returns true if the from/to types denote a narrowing, integral conversion (precision loss).
@@ -214,18 +215,25 @@ struct HInductionVarAnalysis::StackEntry {
size_t low_depth;
};
-HInductionVarAnalysis::HInductionVarAnalysis(HGraph* graph, const char* name)
- : HOptimization(graph, name),
+HInductionVarAnalysis::HInductionVarAnalysis(HGraph* graph,
+ OptimizingCompilerStats* stats,
+ const char* name)
+ : HOptimization(graph, name, stats),
induction_(std::less<const HLoopInformation*>(),
graph->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)),
- cycles_(std::less<HPhi*>(),
- graph->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)) {
+ cycles_(std::less<HPhi*>(), graph->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)) {
}
bool HInductionVarAnalysis::Run() {
// Detects sequence variables (generalized induction variables) during an outer to inner
// traversal of all loops using Gerlek's algorithm. The order is important to enable
// range analysis on outer loop while visiting inner loops.
+
+ if (IsPathologicalCase()) {
+ MaybeRecordStat(stats_, MethodCompilationStat::kNotVarAnalyzedPathological);
+ return false;
+ }
+
for (HBasicBlock* graph_block : graph_->GetReversePostOrder()) {
// Don't analyze irreducible loops.
if (graph_block->IsLoopHeader() && !graph_block->GetLoopInformation()->IsIrreducible()) {
@@ -1576,4 +1584,84 @@ std::string HInductionVarAnalysis::InductionToString(InductionInfo* info) {
return "";
}
+void HInductionVarAnalysis::CalculateLoopHeaderPhisInARow(
+ HPhi* initial_phi,
+ ScopedArenaSafeMap<HPhi*, int>& cached_values,
+ ScopedArenaAllocator& allocator) {
+ DCHECK(initial_phi->IsLoopHeaderPhi());
+ ScopedArenaQueue<HPhi*> worklist(allocator.Adapter(kArenaAllocInductionVarAnalysis));
+ worklist.push(initial_phi);
+ // Used to check which phis are in the current chain we are checking.
+ ScopedArenaSet<HPhi*> phis_in_chain(allocator.Adapter(kArenaAllocInductionVarAnalysis));
+ while (!worklist.empty()) {
+ HPhi* current_phi = worklist.front();
+ DCHECK(current_phi->IsLoopHeaderPhi());
+ if (cached_values.find(current_phi) != cached_values.end()) {
+ // Already processed.
+ worklist.pop();
+ continue;
+ }
+
+ phis_in_chain.insert(current_phi);
+ int max_value = 0;
+ bool pushed_other_phis = false;
+ for (size_t index = 0; index < current_phi->InputCount(); index++) {
+ // If the input is not a loop header phi, we only have 1 (current_phi).
+ int current_value = 1;
+ if (current_phi->InputAt(index)->IsLoopHeaderPhi()) {
+ HPhi* loop_header_phi = current_phi->InputAt(index)->AsPhi();
+ auto it = cached_values.find(loop_header_phi);
+ if (it != cached_values.end()) {
+ current_value += it->second;
+ } else if (phis_in_chain.find(current_phi) == phis_in_chain.end()) {
+ // Push phis which aren't in the chain already to be processed.
+ pushed_other_phis = true;
+ worklist.push(loop_header_phi);
+ }
+ // Phis in the chain will get processed later. We keep `current_value` as 1 to avoid
+ // double counting `loop_header_phi`.
+ }
+ max_value = std::max(max_value, current_value);
+ }
+
+ if (!pushed_other_phis) {
+ // Only finish processing after all inputs were processed.
+ worklist.pop();
+ phis_in_chain.erase(current_phi);
+ cached_values.FindOrAdd(current_phi, max_value);
+ }
+ }
+}
+
+bool HInductionVarAnalysis::IsPathologicalCase() {
+ ScopedArenaAllocator local_allocator(graph_->GetArenaStack());
+ ScopedArenaSafeMap<HPhi*, int> cached_values(
+ std::less<HPhi*>(), local_allocator.Adapter(kArenaAllocInductionVarAnalysis));
+
+ // Due to how our induction passes work, we will take a lot of time compiling if we have several
+ // loop header phis in a row. If we have more than 15 different loop header phis in a row, we
+ // don't perform the analysis.
+ constexpr int kMaximumLoopHeaderPhisInARow = 15;
+
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ if (!block->IsLoopHeader()) {
+ continue;
+ }
+
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ DCHECK(it.Current()->IsLoopHeaderPhi());
+ HPhi* phi = it.Current()->AsPhi();
+ CalculateLoopHeaderPhisInARow(phi, cached_values, local_allocator);
+ DCHECK(cached_values.find(phi) != cached_values.end())
+ << " we should have a value for Phi " << phi->GetId()
+ << " in block " << phi->GetBlock()->GetBlockId();
+ if (cached_values.find(phi)->second > kMaximumLoopHeaderPhisInARow) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
} // namespace art
diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h
index 09417722da..050950089a 100644
--- a/compiler/optimizing/induction_var_analysis.h
+++ b/compiler/optimizing/induction_var_analysis.h
@@ -21,11 +21,12 @@
#include "base/arena_containers.h"
#include "base/array_ref.h"
+#include "base/macros.h"
#include "base/scoped_arena_containers.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
/**
* Induction variable analysis. This class does not have a direct public API.
@@ -38,7 +39,9 @@ namespace art {
*/
class HInductionVarAnalysis : public HOptimization {
public:
- explicit HInductionVarAnalysis(HGraph* graph, const char* name = kInductionPassName);
+ explicit HInductionVarAnalysis(HGraph* graph,
+ OptimizingCompilerStats* stats = nullptr,
+ const char* name = kInductionPassName);
bool Run() override;
@@ -307,6 +310,15 @@ class HInductionVarAnalysis : public HOptimization {
static std::string FetchToString(HInstruction* fetch);
static std::string InductionToString(InductionInfo* info);
+ // Returns true if we have a pathological case we don't want to analyze.
+ bool IsPathologicalCase();
+ // Starting with initial_phi, it calculates how many loop header phis in a row we have. To do
+ // this, we count the loop header phi which are used as an input of other loop header phis. It
+ // uses `cached_values` to avoid recomputing results.
+ void CalculateLoopHeaderPhisInARow(HPhi* initial_phi,
+ ScopedArenaSafeMap<HPhi*, int>& cached_values,
+ ScopedArenaAllocator& allocator);
+
/**
* Maintains the results of the analysis as a mapping from loops to a mapping from instructions
* to the induction information for that instruction in that loop.
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index 4c11ad4643..80c15371dc 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -17,12 +17,13 @@
#include <regex>
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "induction_var_analysis.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
-namespace art {
+namespace art HIDDEN {
/**
* Fixture class for the InductionVarAnalysis tests.
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index ad3d1a9321..9b78699ead 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -17,8 +17,9 @@
#include "induction_var_range.h"
#include <limits>
+#include "optimizing/nodes.h"
-namespace art {
+namespace art HIDDEN {
/** Returns true if 64-bit constant fits in 32-bit constant. */
static bool CanLongValueFitIntoInt(int64_t c) {
@@ -1064,10 +1065,13 @@ bool InductionVarRange::GenerateRangeOrLastValue(const HBasicBlock* context,
case HInductionVarAnalysis::kLinear:
if (*stride_value > 0) {
lower = nullptr;
+ return GenerateLastValueLinear(
+ context, loop, info, trip, graph, block, /*is_min=*/false, upper);
} else {
upper = nullptr;
+ return GenerateLastValueLinear(
+ context, loop, info, trip, graph, block, /*is_min=*/true, lower);
}
- break;
case HInductionVarAnalysis::kPolynomial:
return GenerateLastValuePolynomial(context, loop, info, trip, graph, block, lower);
case HInductionVarAnalysis::kGeometric:
@@ -1113,6 +1117,54 @@ bool InductionVarRange::GenerateRangeOrLastValue(const HBasicBlock* context,
GenerateCode(context, loop, info, trip, graph, block, /*is_min=*/ false, upper);
}
+bool InductionVarRange::GenerateLastValueLinear(const HBasicBlock* context,
+ const HLoopInformation* loop,
+ HInductionVarAnalysis::InductionInfo* info,
+ HInductionVarAnalysis::InductionInfo* trip,
+ HGraph* graph,
+ HBasicBlock* block,
+ bool is_min,
+ /*out*/ HInstruction** result) const {
+ DataType::Type type = info->type;
+ // Avoid any narrowing linear induction or any type mismatch between the linear induction and the
+ // trip count expression.
+ if (HInductionVarAnalysis::IsNarrowingLinear(info) || trip->type != type) {
+ return false;
+ }
+
+ // Stride value must be a known constant that fits into int32.
+ int64_t stride_value = 0;
+ if (!IsConstant(context, loop, info->op_a, kExact, &stride_value) ||
+ !CanLongValueFitIntoInt(stride_value)) {
+ return false;
+ }
+
+ // We require `a` to be a constant value that didn't overflow.
+ const bool is_min_a = stride_value >= 0 ? is_min : !is_min;
+ Value val_a = GetVal(context, loop, trip, trip, is_min_a);
+ HInstruction* opb;
+ if (!IsConstantValue(val_a) ||
+ !GenerateCode(context, loop, info->op_b, trip, graph, block, is_min, &opb)) {
+ return false;
+ }
+
+ if (graph != nullptr) {
+ ArenaAllocator* allocator = graph->GetAllocator();
+ HInstruction* oper;
+ HInstruction* opa = graph->GetConstant(type, val_a.b_constant);
+ if (stride_value == 1) {
+ oper = new (allocator) HAdd(type, opa, opb);
+ } else if (stride_value == -1) {
+ oper = new (graph->GetAllocator()) HSub(type, opb, opa);
+ } else {
+ HInstruction* mul = new (allocator) HMul(type, graph->GetConstant(type, stride_value), opa);
+ oper = new (allocator) HAdd(type, Insert(block, mul), opb);
+ }
+ *result = Insert(block, oper);
+ }
+ return true;
+}
+
bool InductionVarRange::GenerateLastValuePolynomial(const HBasicBlock* context,
const HLoopInformation* loop,
HInductionVarAnalysis::InductionInfo* info,
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 552837c044..3e1212bec8 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_INDUCTION_VAR_RANGE_H_
#define ART_COMPILER_OPTIMIZING_INDUCTION_VAR_RANGE_H_
+#include "base/macros.h"
#include "induction_var_analysis.h"
-namespace art {
+namespace art HIDDEN {
/**
* This class implements range analysis on expressions within loops. It takes the results
@@ -317,6 +318,15 @@ class InductionVarRange {
/*out*/ bool* needs_finite_test,
/*out*/ bool* needs_taken_test) const;
+ bool GenerateLastValueLinear(const HBasicBlock* context,
+ const HLoopInformation* loop,
+ HInductionVarAnalysis::InductionInfo* info,
+ HInductionVarAnalysis::InductionInfo* trip,
+ HGraph* graph,
+ HBasicBlock* block,
+ bool is_min,
+ /*out*/ HInstruction** result) const;
+
bool GenerateLastValuePolynomial(const HBasicBlock* context,
const HLoopInformation* loop,
HInductionVarAnalysis::InductionInfo* info,
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index 962123d948..d879897959 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -17,12 +17,13 @@
#include "induction_var_range.h"
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "induction_var_analysis.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
-namespace art {
+namespace art HIDDEN {
using Value = InductionVarRange::Value;
@@ -1064,10 +1065,6 @@ TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
HInstruction* last = range_.GenerateLastValue(phi, graph_, loop_preheader_);
ASSERT_TRUE(last->IsSub());
ExpectInt(1000, last->InputAt(0));
- ASSERT_TRUE(last->InputAt(1)->IsNeg());
- last = last->InputAt(1)->InputAt(0);
- ASSERT_TRUE(last->IsSub());
- ExpectInt(0, last->InputAt(0));
ExpectInt(1000, last->InputAt(1));
// Loop logic.
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index f73c0d38e4..5a4478dc14 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -46,7 +46,7 @@
#include "thread.h"
#include "verifier/verifier_compiler_binding.h"
-namespace art {
+namespace art HIDDEN {
// Instruction limit to control memory.
static constexpr size_t kMaximumNumberOfTotalInstructions = 1024;
@@ -72,6 +72,9 @@ static constexpr size_t kMaximumNumberOfPolymorphicRecursiveCalls = 0;
// Controls the use of inline caches in AOT mode.
static constexpr bool kUseAOTInlineCaches = true;
+// Controls the use of inlining try catches.
+static constexpr bool kInlineTryCatches = true;
+
// We check for line numbers to make sure the DepthString implementation
// aligns the output nicely.
#define LOG_INTERNAL(msg) \
@@ -141,7 +144,11 @@ bool HInliner::Run() {
}
bool did_inline = false;
- bool did_set_always_throws = false;
+ // The inliner is the only phase that sets invokes as `always throwing`, and since we only run the
+ // inliner once per graph this value should always be false at the beginning of the inlining
+ // phase. This is important since we use `HasAlwaysThrowingInvokes` to know whether the inliner
+ // phase performed a relevant change in the graph.
+ DCHECK(!graph_->HasAlwaysThrowingInvokes());
// Initialize the number of instructions for the method being compiled. Recursive calls
// to HInliner::Run have already updated the instruction count.
@@ -175,14 +182,14 @@ bool HInliner::Run() {
HInstruction* next = instruction->GetNext();
HInvoke* call = instruction->AsInvoke();
// As long as the call is not intrinsified, it is worth trying to inline.
- if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) {
+ if (call != nullptr && !codegen_->IsImplementedIntrinsic(call)) {
if (honor_noinline_directives) {
// Debugging case: directives in method names control or assert on inlining.
std::string callee_name =
call->GetMethodReference().PrettyMethod(/* with_signature= */ false);
// Tests prevent inlining by having $noinline$ in their method names.
if (callee_name.find("$noinline$") == std::string::npos) {
- if (TryInline(call, &did_set_always_throws)) {
+ if (TryInline(call)) {
did_inline = true;
} else if (honor_inline_directives) {
bool should_have_inlined = (callee_name.find("$inline$") != std::string::npos);
@@ -192,7 +199,7 @@ bool HInliner::Run() {
} else {
DCHECK(!honor_inline_directives);
// Normal case: try to inline.
- if (TryInline(call, &did_set_always_throws)) {
+ if (TryInline(call)) {
did_inline = true;
}
}
@@ -201,7 +208,9 @@ bool HInliner::Run() {
}
}
- return did_inline || did_set_always_throws;
+ // We return true if we either inlined at least one method, or we marked one of our methods as
+ // always throwing.
+ return did_inline || graph_->HasAlwaysThrowingInvokes();
}
static bool IsMethodOrDeclaringClassFinal(ArtMethod* method)
@@ -436,7 +445,7 @@ static bool AlwaysThrows(ArtMethod* method)
return throw_seen;
}
-bool HInliner::TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_always_throws) {
+bool HInliner::TryInline(HInvoke* invoke_instruction) {
MaybeRecordStat(stats_, MethodCompilationStat::kTryInline);
// Don't bother to move further if we know the method is unresolved or the invocation is
@@ -472,7 +481,8 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_al
bool result = TryInlineAndReplace(invoke_instruction,
actual_method,
ReferenceTypeInfo::CreateInvalid(),
- /* do_rtp= */ true);
+ /* do_rtp= */ true,
+ /* is_speculative= */ false);
if (result) {
MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvokeVirtualOrInterface);
if (outermost_graph_ == graph_) {
@@ -487,11 +497,10 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_al
} else {
invoke_to_analyze = invoke_instruction;
}
- // Set always throws property for non-inlined method call with single
- // target.
- if (AlwaysThrows(actual_method)) {
- invoke_to_analyze->SetAlwaysThrows(true);
- *did_set_always_throws = true;
+ // Set always throws property for non-inlined method call with single target.
+ if (invoke_instruction->AlwaysThrows() || AlwaysThrows(actual_method)) {
+ invoke_to_analyze->SetAlwaysThrows(/* always_throws= */ true);
+ graph_->SetHasAlwaysThrowingInvokes(/* value= */ true);
}
}
return result;
@@ -499,10 +508,27 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_al
DCHECK(!invoke_instruction->IsInvokeStaticOrDirect());
+ // No try catch inlining allowed here, or recursively. For try catch inlining we are banking on
+ // the fact that we have a unique dex pc list. We cannot guarantee that for some TryInline methods
+ // e.g. `TryInlinePolymorphicCall`.
+ // TODO(solanes): Setting `try_catch_inlining_allowed_` to false here covers all cases from
+ // `TryInlineFromCHA` and from `TryInlineFromInlineCache` as well (e.g.
+ // `TryInlinePolymorphicCall`). Reassess to see if we can inline inline catch blocks in
+ // `TryInlineFromCHA`, `TryInlineMonomorphicCall` and `TryInlinePolymorphicCallToSameTarget`.
+
+ // We store the value to restore it since we will use the same HInliner instance for other inlinee
+ // candidates.
+ const bool previous_value = try_catch_inlining_allowed_;
+ try_catch_inlining_allowed_ = false;
+
if (TryInlineFromCHA(invoke_instruction)) {
+ try_catch_inlining_allowed_ = previous_value;
return true;
}
- return TryInlineFromInlineCache(invoke_instruction);
+
+ const bool result = TryInlineFromInlineCache(invoke_instruction);
+ try_catch_inlining_allowed_ = previous_value;
+ return result;
}
bool HInliner::TryInlineFromCHA(HInvoke* invoke_instruction) {
@@ -518,7 +544,8 @@ bool HInliner::TryInlineFromCHA(HInvoke* invoke_instruction) {
if (!TryInlineAndReplace(invoke_instruction,
method,
ReferenceTypeInfo::CreateInvalid(),
- /* do_rtp= */ true)) {
+ /* do_rtp= */ true,
+ /* is_speculative= */ true)) {
return false;
}
AddCHAGuard(invoke_instruction, dex_pc, cursor, bb_cursor);
@@ -786,7 +813,8 @@ bool HInliner::TryInlineMonomorphicCall(
if (!TryInlineAndReplace(invoke_instruction,
resolved_method,
ReferenceTypeInfo::Create(monomorphic_type, /* is_exact= */ true),
- /* do_rtp= */ false)) {
+ /* do_rtp= */ false,
+ /* is_speculative= */ true)) {
return false;
}
@@ -802,7 +830,6 @@ bool HInliner::TryInlineMonomorphicCall(
// Run type propagation to get the guard typed, and eventually propagate the
// type of the receiver.
ReferenceTypePropagation rtp_fixup(graph_,
- outer_compilation_unit_.GetClassLoader(),
outer_compilation_unit_.GetDexCache(),
/* is_first_run= */ false);
rtp_fixup.Run();
@@ -982,7 +1009,8 @@ bool HInliner::TryInlinePolymorphicCall(
!TryBuildAndInline(invoke_instruction,
method,
ReferenceTypeInfo::Create(handle, /* is_exact= */ true),
- &return_replacement)) {
+ &return_replacement,
+ /* is_speculative= */ true)) {
all_targets_inlined = false;
} else {
one_target_inlined = true;
@@ -1024,7 +1052,6 @@ bool HInliner::TryInlinePolymorphicCall(
// Run type propagation to get the guards typed.
ReferenceTypePropagation rtp_fixup(graph_,
- outer_compilation_unit_.GetClassLoader(),
outer_compilation_unit_.GetDexCache(),
/* is_first_run= */ false);
rtp_fixup.Run();
@@ -1160,7 +1187,8 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
if (!TryBuildAndInline(invoke_instruction,
actual_method,
ReferenceTypeInfo::CreateInvalid(),
- &return_replacement)) {
+ &return_replacement,
+ /* is_speculative= */ true)) {
return false;
}
@@ -1215,7 +1243,6 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
// Run type propagation to get the guard typed.
ReferenceTypePropagation rtp_fixup(graph_,
- outer_compilation_unit_.GetClassLoader(),
outer_compilation_unit_.GetDexCache(),
/* is_first_run= */ false);
rtp_fixup.Run();
@@ -1232,7 +1259,6 @@ void HInliner::MaybeRunReferenceTypePropagation(HInstruction* replacement,
// Actual return value has a more specific type than the method's declared
// return type. Run RTP again on the outer graph to propagate it.
ReferenceTypePropagation(graph_,
- outer_compilation_unit_.GetClassLoader(),
outer_compilation_unit_.GetDexCache(),
/* is_first_run= */ false).Run();
}
@@ -1246,6 +1272,13 @@ bool HInliner::TryDevirtualize(HInvoke* invoke_instruction,
return false;
}
+ // Don't try to devirtualize intrinsics as it breaks pattern matching from later phases.
+ // TODO(solanes): This `if` could be removed if we update optimizations like
+ // TryReplaceStringBuilderAppend.
+ if (invoke_instruction->IsIntrinsic()) {
+ return false;
+ }
+
// Don't bother trying to call directly a default conflict method. It
// doesn't have a proper MethodReference, but also `GetCanonicalMethod`
// will return an actual default implementation.
@@ -1288,7 +1321,8 @@ bool HInliner::TryDevirtualize(HInvoke* invoke_instruction,
dispatch_info,
kDirect,
MethodReference(method->GetDexFile(), method->GetDexMethodIndex()),
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInputsRef inputs = invoke_instruction->GetInputs();
DCHECK_EQ(inputs.size(), invoke_instruction->GetNumberOfArguments());
for (size_t index = 0; index != inputs.size(); ++index) {
@@ -1301,7 +1335,7 @@ bool HInliner::TryDevirtualize(HInvoke* invoke_instruction,
invoke_instruction->GetBlock()->InsertInstructionBefore(new_invoke, invoke_instruction);
new_invoke->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
if (invoke_instruction->GetType() == DataType::Type::kReference) {
- new_invoke->SetReferenceTypeInfo(invoke_instruction->GetReferenceTypeInfo());
+ new_invoke->SetReferenceTypeInfoIfValid(invoke_instruction->GetReferenceTypeInfo());
}
*replacement = new_invoke;
@@ -1316,11 +1350,13 @@ bool HInliner::TryDevirtualize(HInvoke* invoke_instruction,
bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction,
ArtMethod* method,
ReferenceTypeInfo receiver_type,
- bool do_rtp) {
- DCHECK(!invoke_instruction->IsIntrinsic());
+ bool do_rtp,
+ bool is_speculative) {
+ DCHECK(!codegen_->IsImplementedIntrinsic(invoke_instruction));
HInstruction* return_replacement = nullptr;
- if (!TryBuildAndInline(invoke_instruction, method, receiver_type, &return_replacement)) {
+ if (!TryBuildAndInline(
+ invoke_instruction, method, receiver_type, &return_replacement, is_speculative)) {
return false;
}
@@ -1378,6 +1414,15 @@ bool HInliner::IsInliningAllowed(ArtMethod* method, const CodeItemDataAccessor&
return false;
}
+ if (annotations::MethodIsNeverInline(*method->GetDexFile(),
+ method->GetClassDef(),
+ method->GetDexMethodIndex())) {
+ LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedNeverInlineAnnotation)
+ << "Method " << method->PrettyMethod()
+ << " has the @NeverInline annotation so it won't be inlined";
+ return false;
+ }
+
return true;
}
@@ -1397,9 +1442,25 @@ bool HInliner::IsInliningSupported(const HInvoke* invoke_instruction,
}
if (accessor.TriesSize() != 0) {
- LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatchCallee)
- << "Method " << method->PrettyMethod() << " is not inlined because of try block";
- return false;
+ if (!kInlineTryCatches) {
+ LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatchDisabled)
+ << "Method " << method->PrettyMethod()
+ << " is not inlined because inlining try catches is disabled globally";
+ return false;
+ }
+ const bool disallowed_try_catch_inlining =
+ // Direct parent is a try block.
+ invoke_instruction->GetBlock()->IsTryBlock() ||
+ // Indirect parent disallows try catch inlining.
+ !try_catch_inlining_allowed_;
+ if (disallowed_try_catch_inlining) {
+ LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatchCallee)
+ << "Method " << method->PrettyMethod()
+ << " is not inlined because it has a try catch and we are not supporting it for this"
+ << " particular call. This is could be because e.g. it would be inlined inside another"
+ << " try block, we arrived here from TryInlinePolymorphicCall, etc.";
+ return false;
+ }
}
if (invoke_instruction->IsInvokeStaticOrDirect() &&
@@ -1416,9 +1477,9 @@ bool HInliner::IsInliningSupported(const HInvoke* invoke_instruction,
return true;
}
-// Returns whether our resource limits allow inlining this method.
-bool HInliner::IsInliningBudgetAvailable(ArtMethod* method,
- const CodeItemDataAccessor& accessor) const {
+bool HInliner::IsInliningEncouraged(const HInvoke* invoke_instruction,
+ ArtMethod* method,
+ const CodeItemDataAccessor& accessor) const {
if (CountRecursiveCallsOf(method) > kMaximumNumberOfRecursiveCalls) {
LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedRecursiveBudget)
<< "Method "
@@ -1438,13 +1499,21 @@ bool HInliner::IsInliningBudgetAvailable(ArtMethod* method,
return false;
}
+ if (invoke_instruction->GetBlock()->GetLastInstruction()->IsThrow()) {
+ LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedEndsWithThrow)
+ << "Method " << method->PrettyMethod()
+ << " is not inlined because its block ends with a throw";
+ return false;
+ }
+
return true;
}
bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
ArtMethod* method,
ReferenceTypeInfo receiver_type,
- HInstruction** return_replacement) {
+ HInstruction** return_replacement,
+ bool is_speculative) {
// If invoke_instruction is devirtualized to a different method, give intrinsics
// another chance before we try to inline it.
if (invoke_instruction->GetResolvedMethod() != method && method->IsIntrinsic()) {
@@ -1459,7 +1528,8 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
invoke_instruction->GetMethodReference(), // Use existing invoke's method's reference.
method,
MethodReference(method->GetDexFile(), method->GetDexMethodIndex()),
- method->GetMethodIndex());
+ method->GetMethodIndex(),
+ !graph_->IsDebuggable());
DCHECK_NE(new_invoke->GetIntrinsic(), Intrinsics::kNone);
HInputsRef inputs = invoke_instruction->GetInputs();
for (size_t index = 0; index != inputs.size(); ++index) {
@@ -1468,7 +1538,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
invoke_instruction->GetBlock()->InsertInstructionBefore(new_invoke, invoke_instruction);
new_invoke->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
if (invoke_instruction->GetType() == DataType::Type::kReference) {
- new_invoke->SetReferenceTypeInfo(invoke_instruction->GetReferenceTypeInfo());
+ new_invoke->SetReferenceTypeInfoIfValid(invoke_instruction->GetReferenceTypeInfo());
}
*return_replacement = new_invoke;
return true;
@@ -1503,12 +1573,12 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
return false;
}
- if (!IsInliningBudgetAvailable(method, accessor)) {
+ if (!IsInliningEncouraged(invoke_instruction, method, accessor)) {
return false;
}
if (!TryBuildAndInlineHelper(
- invoke_instruction, method, receiver_type, return_replacement)) {
+ invoke_instruction, method, receiver_type, return_replacement, is_speculative)) {
return false;
}
@@ -1627,7 +1697,7 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction,
bool needs_constructor_barrier = false;
for (size_t i = 0; i != number_of_iputs; ++i) {
HInstruction* value = GetInvokeInputForArgVRegIndex(invoke_instruction, iput_args[i]);
- if (!value->IsConstant() || !value->AsConstant()->IsZeroBitPattern()) {
+ if (!IsZeroBitPattern(value)) {
uint16_t field_index = iput_field_indexes[i];
bool is_final;
HInstanceFieldSet* iput =
@@ -1684,7 +1754,6 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(uint32_t field_index,
Handle<mirror::DexCache> dex_cache =
graph_->GetHandleCache()->NewHandle(referrer->GetDexCache());
ReferenceTypePropagation rtp(graph_,
- outer_compilation_unit_.GetClassLoader(),
dex_cache,
/* is_first_run= */ false);
rtp.Visit(iget);
@@ -1795,7 +1864,7 @@ void HInliner::SubstituteArguments(HGraph* callee_graph,
run_rtp = true;
current->SetReferenceTypeInfo(receiver_type);
} else {
- current->SetReferenceTypeInfo(argument->GetReferenceTypeInfo());
+ current->SetReferenceTypeInfoIfValid(argument->GetReferenceTypeInfo());
}
current->AsParameterValue()->SetCanBeNull(argument->CanBeNull());
}
@@ -1807,7 +1876,6 @@ void HInliner::SubstituteArguments(HGraph* callee_graph,
// are more specific than the declared ones, run RTP again on the inner graph.
if (run_rtp || ArgumentTypesMoreSpecific(invoke_instruction, resolved_method)) {
ReferenceTypePropagation(callee_graph,
- outer_compilation_unit_.GetClassLoader(),
dex_compilation_unit.GetDexCache(),
/* is_first_run= */ false).Run();
}
@@ -1821,8 +1889,9 @@ void HInliner::SubstituteArguments(HGraph* callee_graph,
// If this function returns true, it will also set out_number_of_instructions to
// the number of instructions in the inlined body.
bool HInliner::CanInlineBody(const HGraph* callee_graph,
- const HBasicBlock* target_block,
- size_t* out_number_of_instructions) const {
+ HInvoke* invoke,
+ size_t* out_number_of_instructions,
+ bool is_speculative) const {
ArtMethod* const resolved_method = callee_graph->GetArtMethod();
HBasicBlock* exit_block = callee_graph->GetExitBlock();
@@ -1835,15 +1904,30 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph,
bool has_one_return = false;
for (HBasicBlock* predecessor : exit_block->GetPredecessors()) {
- if (predecessor->GetLastInstruction()->IsThrow()) {
- if (target_block->IsTryBlock()) {
- // TODO(ngeoffray): Support adding HTryBoundary in Hgraph::InlineInto.
- LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatchCaller)
+ const HInstruction* last_instruction = predecessor->GetLastInstruction();
+ // On inlinees, we can have Return/ReturnVoid/Throw -> TryBoundary -> Exit. To check for the
+ // actual last instruction, we have to skip the TryBoundary instruction.
+ if (last_instruction->IsTryBoundary()) {
+ predecessor = predecessor->GetSinglePredecessor();
+ last_instruction = predecessor->GetLastInstruction();
+
+ // If the last instruction chain is Return/ReturnVoid -> TryBoundary -> Exit we will have to
+ // split a critical edge in InlineInto and might recompute loop information, which is
+ // unsupported for irreducible loops.
+ if (!last_instruction->IsThrow() && graph_->HasIrreducibleLoops()) {
+ DCHECK(last_instruction->IsReturn() || last_instruction->IsReturnVoid());
+ // TODO(ngeoffray): Support re-computing loop information to graphs with
+ // irreducible loops?
+ LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedIrreducibleLoopCaller)
<< "Method " << resolved_method->PrettyMethod()
- << " could not be inlined because one branch always throws and"
- << " caller is in a try/catch block";
+ << " could not be inlined because we will have to recompute the loop information and"
+ << " the caller has irreducible loops";
return false;
- } else if (graph_->GetExitBlock() == nullptr) {
+ }
+ }
+
+ if (last_instruction->IsThrow()) {
+ if (graph_->GetExitBlock() == nullptr) {
// TODO(ngeoffray): Support adding HExit in the caller graph.
LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedInfiniteLoop)
<< "Method " << resolved_method->PrettyMethod()
@@ -1853,9 +1937,10 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph,
} else if (graph_->HasIrreducibleLoops()) {
// TODO(ngeoffray): Support re-computing loop information to graphs with
// irreducible loops?
- VLOG(compiler) << "Method " << resolved_method->PrettyMethod()
- << " could not be inlined because one branch always throws and"
- << " caller has irreducible loops";
+ LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedIrreducibleLoopCaller)
+ << "Method " << resolved_method->PrettyMethod()
+ << " could not be inlined because one branch always throws and"
+ << " the caller has irreducible loops";
return false;
}
} else {
@@ -1864,6 +1949,15 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph,
}
if (!has_one_return) {
+ if (!is_speculative) {
+ // If we know that the method always throws with the particular parameters, set it as such.
+ // This is better than using the dex instructions as we have more information about this
+ // particular call. We don't mark speculative inlines (e.g. the ones from the inline cache) as
+ // always throwing since they might not throw when executed.
+ invoke->SetAlwaysThrows(/* always_throws= */ true);
+ graph_->SetHasAlwaysThrowingInvokes(/* value= */ true);
+ }
+
LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedAlwaysThrows)
<< "Method " << resolved_method->PrettyMethod()
<< " could not be inlined because it always throws";
@@ -1882,7 +1976,7 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph,
if (block->GetLoopInformation()->IsIrreducible()) {
// Don't inline methods with irreducible loops, they could prevent some
// optimizations to run.
- LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedIrreducibleLoop)
+ LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedIrreducibleLoopCallee)
<< "Method " << resolved_method->PrettyMethod()
<< " could not be inlined because it contains an irreducible loop";
return false;
@@ -1930,8 +2024,10 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph,
if (current->IsUnresolvedStaticFieldGet() ||
current->IsUnresolvedInstanceFieldGet() ||
current->IsUnresolvedStaticFieldSet() ||
- current->IsUnresolvedInstanceFieldSet()) {
- // Entrypoint for unresolved fields does not handle inlined frames.
+ current->IsUnresolvedInstanceFieldSet() ||
+ current->IsInvokeUnresolved()) {
+ // Unresolved invokes / field accesses are expensive at runtime when decoding inlining info,
+ // so don't inline methods that have them.
LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedUnresolvedEntrypoint)
<< "Method " << resolved_method->PrettyMethod()
<< " could not be inlined because it is using an unresolved"
@@ -1964,7 +2060,8 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph,
bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
ArtMethod* resolved_method,
ReferenceTypeInfo receiver_type,
- HInstruction** return_replacement) {
+ HInstruction** return_replacement,
+ bool is_speculative) {
DCHECK(!(resolved_method->IsStatic() && receiver_type.IsValid()));
const dex::CodeItem* code_item = resolved_method->GetCodeItem();
const DexFile& callee_dex_file = *resolved_method->GetDexFile();
@@ -2057,10 +2154,18 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
SubstituteArguments(callee_graph, invoke_instruction, receiver_type, dex_compilation_unit);
- RunOptimizations(callee_graph, code_item, dex_compilation_unit);
+ const bool try_catch_inlining_allowed_for_recursive_inline =
+ // It was allowed previously.
+ try_catch_inlining_allowed_ &&
+ // The current invoke is not a try block.
+ !invoke_instruction->GetBlock()->IsTryBlock();
+ RunOptimizations(callee_graph,
+ code_item,
+ dex_compilation_unit,
+ try_catch_inlining_allowed_for_recursive_inline);
size_t number_of_instructions = 0;
- if (!CanInlineBody(callee_graph, invoke_instruction->GetBlock(), &number_of_instructions)) {
+ if (!CanInlineBody(callee_graph, invoke_instruction, &number_of_instructions, is_speculative)) {
return false;
}
@@ -2095,16 +2200,17 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
void HInliner::RunOptimizations(HGraph* callee_graph,
const dex::CodeItem* code_item,
- const DexCompilationUnit& dex_compilation_unit) {
+ const DexCompilationUnit& dex_compilation_unit,
+ bool try_catch_inlining_allowed_for_recursive_inline) {
// Note: if the outermost_graph_ is being compiled OSR, we should not run any
// optimization that could lead to a HDeoptimize. The following optimizations do not.
HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner");
- HConstantFolding fold(callee_graph, "constant_folding$inliner");
+ HConstantFolding fold(callee_graph, inline_stats_, "constant_folding$inliner");
InstructionSimplifier simplify(callee_graph, codegen_, inline_stats_);
HOptimization* optimizations[] = {
- &simplify,
&fold,
+ &simplify,
&dce,
};
@@ -2141,7 +2247,8 @@ void HInliner::RunOptimizations(HGraph* callee_graph,
total_number_of_dex_registers_ + accessor.RegistersSize(),
total_number_of_instructions_ + number_of_instructions,
this,
- depth_ + 1);
+ depth_ + 1,
+ try_catch_inlining_allowed_for_recursive_inline);
inliner.Run();
}
@@ -2155,6 +2262,10 @@ static bool IsReferenceTypeRefinement(ObjPtr<mirror::Class> declared_class,
}
ReferenceTypeInfo actual_rti = actual_obj->GetReferenceTypeInfo();
+ if (!actual_rti.IsValid()) {
+ return false;
+ }
+
ObjPtr<mirror::Class> actual_class = actual_rti.GetTypeHandle().Get();
return (actual_rti.IsExact() && !declared_is_exact) ||
(declared_class != actual_class && declared_class->IsAssignableFrom(actual_class));
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index a2c2085e00..af067dae73 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -17,13 +17,14 @@
#ifndef ART_COMPILER_OPTIMIZING_INLINER_H_
#define ART_COMPILER_OPTIMIZING_INLINER_H_
+#include "base/macros.h"
#include "dex/dex_file_types.h"
#include "dex/invoke_type.h"
#include "jit/profiling_info.h"
#include "optimization.h"
#include "profile/profile_compilation_info.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
class DexCompilationUnit;
@@ -42,7 +43,8 @@ class HInliner : public HOptimization {
size_t total_number_of_dex_registers,
size_t total_number_of_instructions,
HInliner* parent,
- size_t depth = 0,
+ size_t depth,
+ bool try_catch_inlining_allowed,
const char* name = kInlinerPassName)
: HOptimization(outer_graph, name, stats),
outermost_graph_(outermost_graph),
@@ -54,6 +56,7 @@ class HInliner : public HOptimization {
parent_(parent),
depth_(depth),
inlining_budget_(0),
+ try_catch_inlining_allowed_(try_catch_inlining_allowed),
inline_stats_(nullptr) {}
bool Run() override;
@@ -70,9 +73,7 @@ class HInliner : public HOptimization {
kInlineCacheMissingTypes = 5
};
- // We set `did_set_always_throws` as true if we analyzed `invoke_instruction` and it always
- // throws.
- bool TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_always_throws);
+ bool TryInline(HInvoke* invoke_instruction);
// Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
// reference type propagation can run after the inlining. If the inlining is successful, this
@@ -80,19 +81,22 @@ class HInliner : public HOptimization {
bool TryInlineAndReplace(HInvoke* invoke_instruction,
ArtMethod* resolved_method,
ReferenceTypeInfo receiver_type,
- bool do_rtp)
+ bool do_rtp,
+ bool is_speculative)
REQUIRES_SHARED(Locks::mutator_lock_);
bool TryBuildAndInline(HInvoke* invoke_instruction,
ArtMethod* resolved_method,
ReferenceTypeInfo receiver_type,
- HInstruction** return_replacement)
+ HInstruction** return_replacement,
+ bool is_speculative)
REQUIRES_SHARED(Locks::mutator_lock_);
bool TryBuildAndInlineHelper(HInvoke* invoke_instruction,
ArtMethod* resolved_method,
ReferenceTypeInfo receiver_type,
- HInstruction** return_replacement)
+ HInstruction** return_replacement,
+ bool is_speculative)
REQUIRES_SHARED(Locks::mutator_lock_);
// Substitutes parameters in the callee graph with their values from the caller.
@@ -105,8 +109,9 @@ class HInliner : public HOptimization {
// Run simple optimizations on `callee_graph`.
void RunOptimizations(HGraph* callee_graph,
const dex::CodeItem* code_item,
- const DexCompilationUnit& dex_compilation_unit)
- REQUIRES_SHARED(Locks::mutator_lock_);
+ const DexCompilationUnit& dex_compilation_unit,
+ bool try_catch_inlining_allowed_for_recursive_inline)
+ REQUIRES_SHARED(Locks::mutator_lock_);
// Try to recognize known simple patterns and replace invoke call with appropriate instructions.
bool TryPatternSubstitution(HInvoke* invoke_instruction,
@@ -129,12 +134,14 @@ class HInliner : public HOptimization {
const CodeItemDataAccessor& accessor) const
REQUIRES_SHARED(Locks::mutator_lock_);
- // Returns whether the inlining budget allows inlining method.
+ // Returns whether inlining is encouraged.
//
// For example, this checks whether the function has grown too large and
// inlining should be prevented.
- bool IsInliningBudgetAvailable(art::ArtMethod* method, const CodeItemDataAccessor& accessor) const
- REQUIRES_SHARED(Locks::mutator_lock_);
+ bool IsInliningEncouraged(const HInvoke* invoke_instruction,
+ art::ArtMethod* method,
+ const CodeItemDataAccessor& accessor) const
+ REQUIRES_SHARED(Locks::mutator_lock_);
// Inspects the body of a method (callee_graph) and returns whether it can be
// inlined.
@@ -142,8 +149,9 @@ class HInliner : public HOptimization {
// This checks for instructions and constructs that we do not support
// inlining, such as inlining a throw instruction into a try block.
bool CanInlineBody(const HGraph* callee_graph,
- const HBasicBlock* target_block,
- size_t* out_number_of_instructions) const
+ HInvoke* invoke,
+ size_t* out_number_of_instructions,
+ bool is_speculative) const
REQUIRES_SHARED(Locks::mutator_lock_);
// Create a new HInstanceFieldGet.
@@ -320,6 +328,9 @@ class HInliner : public HOptimization {
// The budget left for inlining, in number of instructions.
size_t inlining_budget_;
+ // States if we are allowing try catch inlining to occur at this particular instance of inlining.
+ bool try_catch_inlining_allowed_;
+
// Used to record stats about optimizations on the inlined graph.
// If the inlining is successful, these stats are merged to the caller graph's stats.
OptimizingCompilerStats* inline_stats_;
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index e0bdd0963c..fee9091145 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -42,7 +42,7 @@
#include "ssa_builder.h"
#include "well_known_classes.h"
-namespace art {
+namespace art HIDDEN {
namespace {
@@ -343,6 +343,10 @@ static bool IsBlockPopulated(HBasicBlock* block) {
// Suspend checks were inserted into loop headers during building of dominator tree.
DCHECK(block->GetFirstInstruction()->IsSuspendCheck());
return block->GetFirstInstruction() != block->GetLastInstruction();
+ } else if (block->IsCatchBlock()) {
+ // Nops were inserted into the beginning of catch blocks.
+ DCHECK(block->GetFirstInstruction()->IsNop());
+ return block->GetFirstInstruction() != block->GetLastInstruction();
} else {
return !block->GetInstructions().IsEmpty();
}
@@ -387,6 +391,11 @@ bool HInstructionBuilder::Build() {
// This is slightly odd because the loop header might not be empty (TryBoundary).
// But we're still creating the environment with locals from the top of the block.
InsertInstructionAtTop(suspend_check);
+ } else if (current_block_->IsCatchBlock()) {
+ // We add an environment emitting instruction at the beginning of each catch block, in order
+ // to support try catch inlining.
+ // This is slightly odd because the catch block might not be empty (TryBoundary).
+ InsertInstructionAtTop(new (allocator_) HNop(block_dex_pc, /* needs_environment= */ true));
}
if (block_dex_pc == kNoDexPc || current_block_ != block_builder_->GetBlockAt(block_dex_pc)) {
@@ -414,7 +423,7 @@ bool HInstructionBuilder::Build() {
}
if (native_debuggable && native_debug_info_locations->IsBitSet(dex_pc)) {
- AppendInstruction(new (allocator_) HNativeDebugInfo(dex_pc));
+ AppendInstruction(new (allocator_) HNop(dex_pc, /* needs_environment= */ true));
}
// Note: There may be no Thread for gtests.
@@ -460,6 +469,9 @@ void HInstructionBuilder::BuildIntrinsic(ArtMethod* method) {
current_block_ = graph_->GetEntryBlock();
InitializeBlockLocals();
InitializeParameters();
+ if (graph_->IsDebuggable() && code_generator_->GetCompilerOptions().IsJitCompiler()) {
+ AppendInstruction(new (allocator_) HMethodEntryHook(0u));
+ }
AppendInstruction(new (allocator_) HGoto(0u));
// Fill the body.
@@ -495,14 +507,21 @@ void HInstructionBuilder::BuildIntrinsic(ArtMethod* method) {
dispatch_info,
invoke_type,
target_method,
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false);
}
// Add the return instruction.
if (return_type_ == DataType::Type::kVoid) {
+ if (graph_->IsDebuggable() && code_generator_->GetCompilerOptions().IsJitCompiler()) {
+ AppendInstruction(new (allocator_) HMethodExitHook(graph_->GetNullConstant(), kNoDexPc));
+ }
AppendInstruction(new (allocator_) HReturnVoid());
} else {
+ if (graph_->IsDebuggable() && code_generator_->GetCompilerOptions().IsJitCompiler()) {
+ AppendInstruction(new (allocator_) HMethodExitHook(latest_result_, kNoDexPc));
+ }
AppendInstruction(new (allocator_) HReturn(latest_result_));
}
@@ -972,11 +991,11 @@ static ArtMethod* ResolveMethod(uint16_t method_idx,
*imt_or_vtable_index = resolved_method->GetVtableIndex();
} else if (*invoke_type == kInterface) {
// For HInvokeInterface we need the IMT index.
- *imt_or_vtable_index = ImTable::GetImtIndex(resolved_method);
+ *imt_or_vtable_index = resolved_method->GetImtIndex();
+ DCHECK_EQ(*imt_or_vtable_index, ImTable::GetImtIndex(resolved_method));
}
- *is_string_constructor =
- resolved_method->IsConstructor() && resolved_method->GetDeclaringClass()->IsStringClass();
+ *is_string_constructor = resolved_method->IsStringConstructor();
return resolved_method;
}
@@ -1041,7 +1060,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
dispatch_info,
invoke_type,
resolved_method_reference,
- HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit,
+ !graph_->IsDebuggable());
return HandleStringInit(invoke, operands, shorty);
}
@@ -1054,7 +1074,7 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
}
// Try to build an HIR replacement for the intrinsic.
- if (UNLIKELY(resolved_method->IsIntrinsic())) {
+ if (UNLIKELY(resolved_method->IsIntrinsic()) && !graph_->IsDebuggable()) {
// All intrinsics are in the primary boot image, so their class can always be referenced
// and we do not need to rely on the implicit class initialization check. The class should
// be initialized but we do not require that here.
@@ -1105,7 +1125,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
dispatch_info,
invoke_type,
resolved_method_reference,
- clinit_check_requirement);
+ clinit_check_requirement,
+ !graph_->IsDebuggable());
if (clinit_check != nullptr) {
// Add the class initialization check as last input of `invoke`.
DCHECK_EQ(clinit_check_requirement, HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit);
@@ -1121,7 +1142,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
method_reference,
resolved_method,
resolved_method_reference,
- /*vtable_index=*/ imt_or_vtable_index);
+ /*vtable_index=*/ imt_or_vtable_index,
+ !graph_->IsDebuggable());
} else {
DCHECK_EQ(invoke_type, kInterface);
if (kIsDebugBuild) {
@@ -1142,7 +1164,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
resolved_method,
resolved_method_reference,
/*imt_index=*/ imt_or_vtable_index,
- load_kind);
+ load_kind,
+ !graph_->IsDebuggable());
}
return HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false);
}
@@ -1341,12 +1364,14 @@ bool HInstructionBuilder::BuildInvokePolymorphic(uint32_t dex_pc,
method_reference,
resolved_method,
resolved_method_reference,
- proto_idx);
+ proto_idx,
+ !graph_->IsDebuggable());
if (!HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false)) {
return false;
}
- if (invoke->GetIntrinsic() != Intrinsics::kMethodHandleInvoke &&
+ if (invoke->GetIntrinsic() != Intrinsics::kNone &&
+ invoke->GetIntrinsic() != Intrinsics::kMethodHandleInvoke &&
invoke->GetIntrinsic() != Intrinsics::kMethodHandleInvokeExact &&
VarHandleAccessorNeedsReturnTypeCheck(invoke, return_type)) {
// Type check is needed because VarHandle intrinsics do not type check the retrieved reference.
@@ -1379,7 +1404,8 @@ bool HInstructionBuilder::BuildInvokeCustom(uint32_t dex_pc,
call_site_idx,
return_type,
dex_pc,
- method_reference);
+ method_reference,
+ !graph_->IsDebuggable());
return HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false);
}
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 817fbaa9e8..3d65d8fb54 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_
#include "base/array_ref.h"
+#include "base/macros.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "data_type.h"
@@ -27,7 +28,7 @@
#include "handle.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
class ArenaBitVector;
class ArtField;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 789f07786c..0c2fd5de56 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -31,13 +31,13 @@
#include "sharpening.h"
#include "string_builder_append.h"
-namespace art {
+namespace art HIDDEN {
// Whether to run an exhaustive test of individual HInstructions cloning when each instruction
// is replaced with its copy if it is clonable.
static constexpr bool kTestInstructionClonerExhaustively = false;
-class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
+class InstructionSimplifierVisitor final : public HGraphDelegateVisitor {
public:
InstructionSimplifierVisitor(HGraph* graph,
CodeGenerator* codegen,
@@ -970,7 +970,7 @@ void InstructionSimplifierVisitor::VisitPredicatedInstanceFieldGet(
pred_get->GetFieldInfo().GetDexFile(),
pred_get->GetDexPc());
if (pred_get->GetType() == DataType::Type::kReference) {
- replace_with->SetReferenceTypeInfo(pred_get->GetReferenceTypeInfo());
+ replace_with->SetReferenceTypeInfoIfValid(pred_get->GetReferenceTypeInfo());
}
pred_get->GetBlock()->InsertInstructionBefore(replace_with, pred_get);
pred_get->ReplaceWith(replace_with);
@@ -1117,6 +1117,10 @@ void InstructionSimplifierVisitor::VisitIf(HIf* instruction) {
}
}
+// TODO(solanes): This optimization should be in ConstantFolding since we are folding to a constant.
+// However, we get code size regressions when we do that since we sometimes have a NullCheck between
+// HArrayLength and IsNewArray, and said NullCheck is eliminated in InstructionSimplifier. If we run
+// ConstantFolding and InstructionSimplifier in lockstep this wouldn't be an issue.
void InstructionSimplifierVisitor::VisitArrayLength(HArrayLength* instruction) {
HInstruction* input = instruction->InputAt(0);
// If the array is a NewArray with constant size, replace the array length
@@ -1142,13 +1146,13 @@ void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) {
if (value->IsArrayGet()) {
if (value->AsArrayGet()->GetArray() == instruction->GetArray()) {
// If the code is just swapping elements in the array, no need for a type check.
- instruction->ClearNeedsTypeCheck();
+ instruction->ClearTypeCheck();
return;
}
}
if (value->IsNullConstant()) {
- instruction->ClearNeedsTypeCheck();
+ instruction->ClearTypeCheck();
return;
}
@@ -1160,13 +1164,13 @@ void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) {
}
if (value_rti.IsValid() && array_rti.CanArrayHold(value_rti)) {
- instruction->ClearNeedsTypeCheck();
+ instruction->ClearTypeCheck();
return;
}
if (array_rti.IsObjectArray()) {
if (array_rti.IsExact()) {
- instruction->ClearNeedsTypeCheck();
+ instruction->ClearTypeCheck();
return;
}
instruction->SetStaticTypeOfArrayIsObjectArray();
@@ -1860,13 +1864,16 @@ void InstructionSimplifierVisitor::VisitDiv(HDiv* instruction) {
// Search HDiv having the specified dividend and divisor which is in the specified basic block.
// Return nullptr if nothing has been found.
-static HInstruction* FindDivWithInputsInBasicBlock(HInstruction* dividend,
- HInstruction* divisor,
- HBasicBlock* basic_block) {
+static HDiv* FindDivWithInputsInBasicBlock(HInstruction* dividend,
+ HInstruction* divisor,
+ HBasicBlock* basic_block) {
for (const HUseListNode<HInstruction*>& use : dividend->GetUses()) {
HInstruction* user = use.GetUser();
- if (user->GetBlock() == basic_block && user->IsDiv() && user->InputAt(1) == divisor) {
- return user;
+ if (user->GetBlock() == basic_block &&
+ user->IsDiv() &&
+ user->InputAt(0) == dividend &&
+ user->InputAt(1) == divisor) {
+ return user->AsDiv();
}
}
return nullptr;
@@ -1900,7 +1907,7 @@ void InstructionSimplifierVisitor::TryToReuseDiv(HRem* rem) {
}
}
- HInstruction* quotient = FindDivWithInputsInBasicBlock(dividend, divisor, basic_block);
+ HDiv* quotient = FindDivWithInputsInBasicBlock(dividend, divisor, basic_block);
if (quotient == nullptr) {
return;
}
@@ -2458,7 +2465,7 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction)
DCHECK(method != nullptr);
DCHECK(method->IsStatic());
DCHECK(method->GetDeclaringClass() == system);
- invoke->SetResolvedMethod(method);
+ invoke->SetResolvedMethod(method, !codegen_->GetGraph()->IsDebuggable());
// Sharpen the new invoke. Note that we do not update the dex method index of
// the invoke, as we would need to look it up in the current dex file, and it
// is unlikely that it exists. The most usual situation for such typed
@@ -2647,15 +2654,13 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) {
// Collect args and check for unexpected uses.
// We expect one call to a constructor with no arguments, one constructor fence (unless
// eliminated), some number of append calls and one call to StringBuilder.toString().
- bool constructor_inlined = false;
bool seen_constructor = false;
bool seen_constructor_fence = false;
bool seen_to_string = false;
uint32_t format = 0u;
uint32_t num_args = 0u;
+ bool has_fp_args = false;
HInstruction* args[StringBuilderAppend::kMaxArgs]; // Added in reverse order.
- // When inlining, `maybe_new_array` tracks an environment use that we want to allow.
- HInstruction* maybe_new_array = nullptr;
for (HBackwardInstructionIterator iter(block->GetInstructions()); !iter.Done(); iter.Advance()) {
HInstruction* user = iter.Current();
// Instructions of interest apply to `sb`, skip those that do not involve `sb`.
@@ -2700,6 +2705,14 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) {
case Intrinsics::kStringBuilderAppendLong:
arg = StringBuilderAppend::Argument::kLong;
break;
+ case Intrinsics::kStringBuilderAppendFloat:
+ arg = StringBuilderAppend::Argument::kFloat;
+ has_fp_args = true;
+ break;
+ case Intrinsics::kStringBuilderAppendDouble:
+ arg = StringBuilderAppend::Argument::kDouble;
+ has_fp_args = true;
+ break;
case Intrinsics::kStringBuilderAppendCharSequence: {
ReferenceTypeInfo rti = user->AsInvokeVirtual()->InputAt(1)->GetReferenceTypeInfo();
if (!rti.IsValid()) {
@@ -2719,10 +2732,6 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) {
}
break;
}
- case Intrinsics::kStringBuilderAppendFloat:
- case Intrinsics::kStringBuilderAppendDouble:
- // TODO: Unimplemented, needs to call FloatingDecimal.getBinaryToASCIIConverter().
- return false;
default: {
return false;
}
@@ -2736,25 +2745,13 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) {
format = (format << StringBuilderAppend::kBitsPerArg) | static_cast<uint32_t>(arg);
args[num_args] = as_invoke_virtual->InputAt(1u);
++num_args;
- } else if (!seen_constructor) {
- // At this point, we should see the constructor. However, we might have inlined it so we have
- // to take care of both cases. We accept only the constructor with no extra arguments. This
- // means that if we inline it, we have to check it is setting its field to a new array.
- if (user->IsInvokeStaticOrDirect() &&
- user->AsInvokeStaticOrDirect()->GetResolvedMethod() != nullptr &&
- user->AsInvokeStaticOrDirect()->GetResolvedMethod()->IsConstructor() &&
- user->AsInvokeStaticOrDirect()->GetNumberOfArguments() == 1u) {
- constructor_inlined = false;
- } else if (user->IsInstanceFieldSet() &&
- user->AsInstanceFieldSet()->GetFieldType() == DataType::Type::kReference &&
- user->AsInstanceFieldSet()->InputAt(0) == sb &&
- user->AsInstanceFieldSet()->GetValue()->IsNewArray()) {
- maybe_new_array = user->AsInstanceFieldSet()->GetValue();
- constructor_inlined = true;
- } else {
- // We were expecting a constructor but we haven't seen it. Abort optimization.
- return false;
- }
+ } else if (user->IsInvokeStaticOrDirect() &&
+ user->AsInvokeStaticOrDirect()->GetResolvedMethod() != nullptr &&
+ user->AsInvokeStaticOrDirect()->GetResolvedMethod()->IsConstructor() &&
+ user->AsInvokeStaticOrDirect()->GetNumberOfArguments() == 1u) {
+ // After arguments, we should see the constructor.
+ // We accept only the constructor with no extra arguments.
+ DCHECK(!seen_constructor);
DCHECK(!seen_constructor_fence);
seen_constructor = true;
} else if (user->IsConstructorFence()) {
@@ -2780,17 +2777,6 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) {
// Accept only calls on the StringBuilder (which shall all be removed).
// TODO: Carve-out for const-string? Or rely on environment pruning (to be implemented)?
if (holder->InputCount() == 0 || holder->InputAt(0) != sb) {
- // When inlining the constructor, we have a NewArray and may have a LoadClass as an
- // environment use.
- if (constructor_inlined) {
- if (holder == maybe_new_array) {
- continue;
- }
- if (holder == maybe_new_array->InputAt(0)) {
- DCHECK(holder->IsLoadClass());
- continue;
- }
- }
return false;
}
}
@@ -2798,9 +2784,9 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) {
// Create replacement instruction.
HIntConstant* fmt = block->GetGraph()->GetIntConstant(static_cast<int32_t>(format));
ArenaAllocator* allocator = block->GetGraph()->GetAllocator();
- HStringBuilderAppend* append =
- new (allocator) HStringBuilderAppend(fmt, num_args, allocator, invoke->GetDexPc());
- append->SetReferenceTypeInfo(invoke->GetReferenceTypeInfo());
+ HStringBuilderAppend* append = new (allocator) HStringBuilderAppend(
+ fmt, num_args, has_fp_args, allocator, invoke->GetDexPc());
+ append->SetReferenceTypeInfoIfValid(invoke->GetReferenceTypeInfo());
for (size_t i = 0; i != num_args; ++i) {
append->SetArgumentAt(i, args[num_args - 1u - i]);
}
@@ -2824,33 +2810,6 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) {
while (sb->HasNonEnvironmentUses()) {
block->RemoveInstruction(sb->GetUses().front().GetUser());
}
- if (constructor_inlined) {
- // We need to remove the inlined constructor instructions,
- // and all remaining environment uses (if any).
- DCHECK(sb->HasEnvironmentUses());
- DCHECK(maybe_new_array != nullptr);
- DCHECK(maybe_new_array->IsNewArray());
- DCHECK(maybe_new_array->HasNonEnvironmentUses());
- HInstruction* fence = maybe_new_array->GetUses().front().GetUser();
- DCHECK(fence->IsConstructorFence());
- block->RemoveInstruction(fence);
- block->RemoveInstruction(maybe_new_array);
- if (sb->HasEnvironmentUses()) {
- // We know the only remaining uses are from the LoadClass.
- HInstruction* load_class = maybe_new_array->InputAt(0);
- DCHECK(load_class->IsLoadClass());
- for (HEnvironment* env = load_class->GetEnvironment();
- env != nullptr;
- env = env->GetParent()) {
- for (size_t i = 0, size = env->Size(); i != size; ++i) {
- if (env->GetInstructionAt(i) == sb) {
- env->RemoveAsUserOfInput(i);
- env->SetRawEnvAt(i, /*instruction=*/ nullptr);
- }
- }
- }
- }
- }
DCHECK(!sb->HasEnvironmentUses());
block->RemoveInstruction(sb);
return true;
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
index feea771096..98ebaafebc 100644
--- a/compiler/optimizing/instruction_simplifier.h
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -17,11 +17,12 @@
#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_H_
#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
#include "optimizing_compiler_stats.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
index 1371ea7781..05a518d544 100644
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -23,7 +23,7 @@
#include "mirror/string.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
using helpers::CanFitInShifterOperand;
using helpers::HasShifterOperand;
@@ -31,7 +31,7 @@ using helpers::IsSubRightSubLeftShl;
namespace arm {
-class InstructionSimplifierArmVisitor : public HGraphVisitor {
+class InstructionSimplifierArmVisitor final : public HGraphVisitor {
public:
InstructionSimplifierArmVisitor(HGraph* graph, OptimizingCompilerStats* stats)
: HGraphVisitor(graph), stats_(stats) {}
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
index fca9341d59..0517e4f49e 100644
--- a/compiler/optimizing/instruction_simplifier_arm.h
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
class InstructionSimplifierArm : public HOptimization {
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index a6ec02012c..671900bd9d 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -21,7 +21,7 @@
#include "mirror/array-inl.h"
#include "mirror/string.h"
-namespace art {
+namespace art HIDDEN {
using helpers::CanFitInShifterOperand;
using helpers::HasShifterOperand;
@@ -31,7 +31,7 @@ namespace arm64 {
using helpers::ShifterOperandSupportsExtension;
-class InstructionSimplifierArm64Visitor : public HGraphVisitor {
+class InstructionSimplifierArm64Visitor final : public HGraphVisitor {
public:
InstructionSimplifierArm64Visitor(HGraph* graph, OptimizingCompilerStats* stats)
: HGraphVisitor(graph), stats_(stats) {}
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 8d93c01ebf..374638ab9e 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM64_H_
#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM64_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
namespace arm64 {
class InstructionSimplifierArm64 : public HOptimization {
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index dc60ba62bb..34daae21ee 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -18,7 +18,7 @@
#include "mirror/array-inl.h"
-namespace art {
+namespace art HIDDEN {
namespace {
@@ -244,7 +244,7 @@ bool TryExtractArrayAccessAddress(HInstruction* access,
// The access may require a runtime call or the original array pointer.
return false;
}
- if (kEmitCompilerReadBarrier &&
+ if (gUseReadBarrier &&
!kUseBakerReadBarrier &&
access->IsArrayGet() &&
access->GetType() == DataType::Type::kReference) {
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index 876ed21a22..ddc3a867b8 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
+#include "base/macros.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
namespace helpers {
diff --git a/compiler/optimizing/instruction_simplifier_test.cc b/compiler/optimizing/instruction_simplifier_test.cc
index c7c5b12e25..966f5b91cf 100644
--- a/compiler/optimizing/instruction_simplifier_test.cc
+++ b/compiler/optimizing/instruction_simplifier_test.cc
@@ -26,13 +26,15 @@
#include "optimizing/data_type.h"
#include "optimizing_unit_test.h"
-namespace art {
+namespace art HIDDEN {
namespace mirror {
class ClassExt;
class Throwable;
} // namespace mirror
+static constexpr bool kDebugSimplifierTests = false;
+
template<typename SuperClass>
class InstructionSimplifierTestBase : public SuperClass, public OptimizingUnitTestHelper {
public:
@@ -49,6 +51,19 @@ class InstructionSimplifierTestBase : public SuperClass, public OptimizingUnitTe
SuperClass::TearDown();
gLogVerbosity.compiler = false;
}
+
+ void PerformSimplification(const AdjacencyListGraph& blks) {
+ if (kDebugSimplifierTests) {
+ LOG(INFO) << "Pre simplification " << blks;
+ }
+ graph_->ClearDominanceInformation();
+ graph_->BuildDominatorTree();
+ InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
+ simp.Run();
+ if (kDebugSimplifierTests) {
+ LOG(INFO) << "Post simplify " << blks;
+ }
+ }
};
class InstructionSimplifierTest : public InstructionSimplifierTestBase<CommonCompilerTest> {};
@@ -197,13 +212,7 @@ TEST_F(InstructionSimplifierTest, SimplifyPredicatedFieldGetNoMerge) {
SetupExit(exit);
- LOG(INFO) << "Pre simplification " << blks;
- graph_->ClearDominanceInformation();
- graph_->BuildDominatorTree();
- InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
- simp.Run();
-
- LOG(INFO) << "Post simplify " << blks;
+ PerformSimplification(blks);
EXPECT_INS_RETAINED(read_end);
@@ -289,13 +298,7 @@ TEST_F(InstructionSimplifierTest, SimplifyPredicatedFieldGetMerge) {
SetupExit(exit);
- LOG(INFO) << "Pre simplification " << blks;
- graph_->ClearDominanceInformation();
- graph_->BuildDominatorTree();
- InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
- simp.Run();
-
- LOG(INFO) << "Post simplify " << blks;
+ PerformSimplification(blks);
EXPECT_FALSE(obj3->CanBeNull());
EXPECT_INS_RETAINED(read_end);
@@ -373,13 +376,7 @@ TEST_F(InstructionSimplifierTest, SimplifyPredicatedFieldGetNoNull) {
SetupExit(exit);
- LOG(INFO) << "Pre simplification " << blks;
- graph_->ClearDominanceInformation();
- graph_->BuildDominatorTree();
- InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
- simp.Run();
-
- LOG(INFO) << "Post simplify " << blks;
+ PerformSimplification(blks);
EXPECT_FALSE(obj1->CanBeNull());
EXPECT_FALSE(obj2->CanBeNull());
@@ -464,16 +461,7 @@ TEST_P(InstanceOfInstructionSimplifierTestGroup, ExactClassInstanceOfOther) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
-
- LOG(INFO) << "Pre simplification " << blks;
- graph_->ClearDominanceInformation();
- graph_->BuildDominatorTree();
- InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
- simp.Run();
-
- LOG(INFO) << "Post simplify " << blks;
+ PerformSimplification(blks);
if (!GetConstantResult() || GetParam() == InstanceOfKind::kSelf) {
EXPECT_INS_RETAINED(target_klass);
@@ -532,16 +520,7 @@ TEST_P(InstanceOfInstructionSimplifierTestGroup, ExactClassCheckCastOther) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
-
- LOG(INFO) << "Pre simplification " << blks;
- graph_->ClearDominanceInformation();
- graph_->BuildDominatorTree();
- InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
- simp.Run();
-
- LOG(INFO) << "Post simplify " << blks;
+ PerformSimplification(blks);
if (!GetConstantResult() || GetParam() == InstanceOfKind::kSelf) {
EXPECT_INS_RETAINED(target_klass);
diff --git a/compiler/optimizing/instruction_simplifier_x86.cc b/compiler/optimizing/instruction_simplifier_x86.cc
index 2d8f94a85b..5a4345d589 100644
--- a/compiler/optimizing/instruction_simplifier_x86.cc
+++ b/compiler/optimizing/instruction_simplifier_x86.cc
@@ -17,11 +17,11 @@
#include "instruction_simplifier_x86_shared.h"
#include "code_generator_x86.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
-class InstructionSimplifierX86Visitor : public HGraphVisitor {
+class InstructionSimplifierX86Visitor final : public HGraphVisitor {
public:
InstructionSimplifierX86Visitor(HGraph* graph,
CodeGenerator* codegen,
diff --git a/compiler/optimizing/instruction_simplifier_x86.h b/compiler/optimizing/instruction_simplifier_x86.h
index 6f10006db2..25ebe203b8 100644
--- a/compiler/optimizing/instruction_simplifier_x86.h
+++ b/compiler/optimizing/instruction_simplifier_x86.h
@@ -16,10 +16,11 @@
#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_
#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
namespace x86 {
diff --git a/compiler/optimizing/instruction_simplifier_x86_64.cc b/compiler/optimizing/instruction_simplifier_x86_64.cc
index 56c6b414d7..9ba1a8a960 100644
--- a/compiler/optimizing/instruction_simplifier_x86_64.cc
+++ b/compiler/optimizing/instruction_simplifier_x86_64.cc
@@ -17,11 +17,11 @@
#include "instruction_simplifier_x86_shared.h"
#include "code_generator_x86_64.h"
-namespace art {
+namespace art HIDDEN {
namespace x86_64 {
-class InstructionSimplifierX86_64Visitor : public HGraphVisitor {
+class InstructionSimplifierX86_64Visitor final : public HGraphVisitor {
public:
InstructionSimplifierX86_64Visitor(HGraph* graph,
CodeGenerator* codegen,
diff --git a/compiler/optimizing/instruction_simplifier_x86_64.h b/compiler/optimizing/instruction_simplifier_x86_64.h
index 6cae24d11a..1654dc4774 100644
--- a/compiler/optimizing/instruction_simplifier_x86_64.h
+++ b/compiler/optimizing/instruction_simplifier_x86_64.h
@@ -16,10 +16,11 @@
#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_64_H_
#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_64_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
diff --git a/compiler/optimizing/instruction_simplifier_x86_shared.cc b/compiler/optimizing/instruction_simplifier_x86_shared.cc
index 2805abb2bb..74c5ca2466 100644
--- a/compiler/optimizing/instruction_simplifier_x86_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_x86_shared.cc
@@ -14,9 +14,10 @@
*/
#include "instruction_simplifier_x86_shared.h"
+
#include "nodes_x86.h"
-namespace art {
+namespace art HIDDEN {
bool TryCombineAndNot(HAnd* instruction) {
DataType::Type type = instruction->GetType();
diff --git a/compiler/optimizing/instruction_simplifier_x86_shared.h b/compiler/optimizing/instruction_simplifier_x86_shared.h
index 7f94d7ea4c..1a44d0fdb5 100644
--- a/compiler/optimizing/instruction_simplifier_x86_shared.h
+++ b/compiler/optimizing/instruction_simplifier_x86_shared.h
@@ -16,13 +16,16 @@
#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_
#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_
+#include "base/macros.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
+
bool TryCombineAndNot(HAnd* instruction);
bool TryGenerateResetLeastSetBit(HAnd* instruction);
bool TryGenerateMaskUptoLeastSetBit(HXor* instruction);
bool AreLeastSetBitInputs(HInstruction* to_test, HInstruction* other);
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_
diff --git a/compiler/optimizing/intrinsic_objects.cc b/compiler/optimizing/intrinsic_objects.cc
index 5f6f562161..7e542117a9 100644
--- a/compiler/optimizing/intrinsic_objects.cc
+++ b/compiler/optimizing/intrinsic_objects.cc
@@ -22,7 +22,7 @@
#include "image.h"
#include "obj_ptr-inl.h"
-namespace art {
+namespace art HIDDEN {
static constexpr size_t kIntrinsicObjectsOffset =
enum_cast<size_t>(ImageHeader::kIntrinsicObjectsStart);
diff --git a/compiler/optimizing/intrinsic_objects.h b/compiler/optimizing/intrinsic_objects.h
index ed764bd4b2..d750f2934b 100644
--- a/compiler/optimizing/intrinsic_objects.h
+++ b/compiler/optimizing/intrinsic_objects.h
@@ -19,9 +19,10 @@
#include "base/bit_field.h"
#include "base/bit_utils.h"
+#include "base/macros.h"
#include "base/mutex.h"
-namespace art {
+namespace art HIDDEN {
class ClassLinker;
template <class MirrorType> class ObjPtr;
@@ -56,15 +57,15 @@ class IntrinsicObjects {
}
// Functions for retrieving data for Integer.valueOf().
- static ObjPtr<mirror::ObjectArray<mirror::Object>> LookupIntegerCache(
+ EXPORT static ObjPtr<mirror::ObjectArray<mirror::Object>> LookupIntegerCache(
Thread* self, ClassLinker* class_linker) REQUIRES_SHARED(Locks::mutator_lock_);
- static ObjPtr<mirror::ObjectArray<mirror::Object>> GetIntegerValueOfCache(
+ EXPORT static ObjPtr<mirror::ObjectArray<mirror::Object>> GetIntegerValueOfCache(
ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects)
REQUIRES_SHARED(Locks::mutator_lock_);
- static ObjPtr<mirror::Object> GetIntegerValueOfObject(
+ EXPORT static ObjPtr<mirror::Object> GetIntegerValueOfObject(
ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects,
uint32_t index) REQUIRES_SHARED(Locks::mutator_lock_);
- static MemberOffset GetIntegerValueOfArrayDataOffset(
+ EXPORT static MemberOffset GetIntegerValueOfArrayDataOffset(
ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects)
REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index f2d2b45da9..774deec438 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -32,7 +32,7 @@
#include "scoped_thread_state_change-inl.h"
#include "thread-current-inl.h"
-namespace art {
+namespace art HIDDEN {
std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) {
switch (intrinsic) {
@@ -171,6 +171,7 @@ void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke,
if (!CanReferenceBootImageObjects(invoke, compiler_options)) {
return;
}
+ HInstruction* const input = invoke->InputAt(0);
if (compiler_options.IsBootImage()) {
if (!compiler_options.IsImageClass(kIntegerCacheDescriptor) ||
!compiler_options.IsImageClass(kIntegerDescriptor)) {
@@ -207,8 +208,8 @@ void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke,
CHECK_EQ(value_field->GetInt(current_object), low + i);
}
}
- if (invoke->InputAt(0)->IsIntConstant()) {
- int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+ if (input->IsIntConstant()) {
+ int32_t value = input->AsIntConstant()->GetValue();
if (static_cast<uint32_t>(value) - static_cast<uint32_t>(low) <
static_cast<uint32_t>(high - low + 1)) {
// No call, we shall use direct pointer to the Integer object.
@@ -232,8 +233,8 @@ void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke,
} else {
DCHECK(compiler_options.IsAotCompiler());
DCHECK(CheckIntegerCache(self, runtime->GetClassLinker(), boot_image_live_objects, cache));
- if (invoke->InputAt(0)->IsIntConstant()) {
- int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+ if (input->IsIntConstant()) {
+ int32_t value = input->AsIntConstant()->GetValue();
// Retrieve the `value` from the lowest cached Integer.
ObjPtr<mirror::Object> low_integer =
IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, 0u);
@@ -255,11 +256,11 @@ void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke,
ArenaAllocator* allocator = codegen->GetGraph()->GetAllocator();
LocationSummary* locations = new (allocator) LocationSummary(invoke, call_kind, kIntrinsified);
if (call_kind == LocationSummary::kCallOnMainOnly) {
- locations->SetInAt(0, Location::RegisterOrConstant(invoke->InputAt(0)));
+ locations->SetInAt(0, Location::RegisterOrConstant(input));
locations->AddTemp(first_argument_location);
locations->SetOut(return_location);
} else {
- locations->SetInAt(0, Location::ConstantLocation(invoke->InputAt(0)->AsConstant()));
+ locations->SetInAt(0, Location::ConstantLocation(input));
locations->SetOut(Location::RequiresRegister());
}
}
@@ -392,7 +393,7 @@ void IntrinsicVisitor::CreateReferenceGetReferentLocations(HInvoke* invoke,
}
void IntrinsicVisitor::CreateReferenceRefersToLocations(HInvoke* invoke) {
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
// Unimplemented for non-Baker read barrier.
return;
}
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 5109882295..893cd04411 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -17,12 +17,13 @@
#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_H_
#define ART_COMPILER_OPTIMIZING_INTRINSICS_H_
+#include "base/macros.h"
#include "code_generator.h"
#include "nodes.h"
#include "optimization.h"
#include "parallel_move_resolver.h"
-namespace art {
+namespace art HIDDEN {
class DexFile;
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 646f4f2ea7..d2dbaa32e3 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -46,7 +46,7 @@ using namespace vixl::aarch64; // NOLINT(build/namespaces)
#include "aarch64/macro-assembler-aarch64.h"
#pragma GCC diagnostic pop
-namespace art {
+namespace art HIDDEN {
namespace arm64 {
@@ -55,7 +55,6 @@ using helpers::DRegisterFrom;
using helpers::HeapOperand;
using helpers::LocationFrom;
using helpers::InputCPURegisterOrZeroRegAt;
-using helpers::IsConstantZeroBitPattern;
using helpers::OperandFrom;
using helpers::RegisterFrom;
using helpers::SRegisterFrom;
@@ -92,7 +91,7 @@ class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
public:
ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
: SlowPathCodeARM64(instruction), tmp_(tmp) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
}
@@ -711,7 +710,7 @@ static void GenUnsafeGet(HInvoke* invoke,
Location trg_loc = locations->Out();
Register trg = RegisterFrom(trg_loc, type);
- if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) {
// UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
Register temp = WRegisterFrom(locations->GetTemp(0));
MacroAssembler* masm = codegen->GetVIXLAssembler();
@@ -754,7 +753,7 @@ static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) {
}
static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- bool can_call = kEmitCompilerReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
+ bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
LocationSummary* locations =
new (allocator) LocationSummary(invoke,
can_call
@@ -1096,7 +1095,7 @@ void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLongRelease(HInvoke* invoke)
}
static void CreateUnsafeCASLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- const bool can_call = kEmitCompilerReadBarrier && IsUnsafeCASObject(invoke);
+ const bool can_call = gUseReadBarrier && IsUnsafeCASObject(invoke);
LocationSummary* locations =
new (allocator) LocationSummary(invoke,
can_call
@@ -1448,7 +1447,7 @@ static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM6
vixl::aarch64::Label* exit_loop = &exit_loop_label;
vixl::aarch64::Label* cmp_failure = &exit_loop_label;
- if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) {
+ if (gUseReadBarrier && type == DataType::Type::kReference) {
// We need to store the `old_value` in a non-scratch register to make sure
// the read barrier in the slow path does not clobber it.
old_value = WRegisterFrom(locations->GetTemp(0)); // The old value from main path.
@@ -1523,12 +1522,12 @@ void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetLong(HInvoke* in
}
void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
CreateUnsafeCASLocations(allocator_, invoke);
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// We need two non-scratch temporary registers for read barrier.
LocationSummary* locations = invoke->GetLocations();
if (kUseBakerReadBarrier) {
@@ -1578,7 +1577,7 @@ void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invok
}
void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
GenUnsafeCas(invoke, DataType::Type::kReference, codegen_);
}
@@ -2576,9 +2575,9 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ Bind(&done);
}
-// Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native
-// implementation there for longer copy lengths.
-static constexpr int32_t kSystemArrayCopyCharThreshold = 32;
+// This value is greater than ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore,
+// so if we choose to jump to the slow path we will end up in the native implementation.
+static constexpr int32_t kSystemArrayCopyCharThreshold = 192;
static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
uint32_t at,
@@ -2710,11 +2709,13 @@ static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
__ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift));
}
- if (copy_length.IsConstant()) {
- int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
- __ Add(src_end, src_base, element_size * constant);
- } else {
- __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift));
+ if (src_end.IsValid()) {
+ if (copy_length.IsConstant()) {
+ int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
+ __ Add(src_end, src_base, element_size * constant);
+ } else {
+ __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift));
+ }
}
}
@@ -2745,13 +2746,14 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
if (!length.IsConstant()) {
// Merge the following two comparisons into one:
// If the length is negative, bail out (delegate to libcore's native implementation).
- // If the length > 32 then (currently) prefer libcore's native implementation.
+ // If the length > kSystemArrayCopyCharThreshold then (currently) prefer libcore's
+ // native implementation.
__ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold);
__ B(slow_path->GetEntryLabel(), hi);
} else {
// We have already checked in the LocationsBuilder for the constant case.
DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
- DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), 32);
+ DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), kSystemArrayCopyCharThreshold);
}
Register src_curr_addr = WRegisterFrom(locations->GetTemp(0));
@@ -2787,21 +2789,102 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
length,
src_curr_addr,
dst_curr_addr,
- src_stop_addr);
+ Register());
// Iterate over the arrays and do a raw copy of the chars.
const int32_t char_size = DataType::Size(DataType::Type::kUint16);
UseScratchRegisterScope temps(masm);
- Register tmp = temps.AcquireW();
- vixl::aarch64::Label loop, done;
- __ Bind(&loop);
- __ Cmp(src_curr_addr, src_stop_addr);
- __ B(&done, eq);
- __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
- __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
- __ B(&loop);
- __ Bind(&done);
+ // We split processing of the array in two parts: head and tail.
+ // A first loop handles the head by copying a block of characters per
+ // iteration (see: chars_per_block).
+ // A second loop handles the tail by copying the remaining characters.
+ // If the copy length is not constant, we copy them one-by-one.
+ // If the copy length is constant, we optimize by always unrolling the tail
+ // loop, and also unrolling the head loop when the copy length is small (see:
+ // unroll_threshold).
+ //
+ // Both loops are inverted for better performance, meaning they are
+ // implemented as conditional do-while loops.
+ // Here, the loop condition is first checked to determine if there are
+ // sufficient chars to run an iteration, then we enter the do-while: an
+ // iteration is performed followed by a conditional branch only if another
+ // iteration is necessary. As opposed to a standard while-loop, this inversion
+ // can save some branching (e.g. we don't branch back to the initial condition
+ // at the end of every iteration only to potentially immediately branch
+ // again).
+ //
+ // A full block of chars is subtracted and added before and after the head
+ // loop, respectively. This ensures that any remaining length after each
+ // head loop iteration means there is a full block remaining, reducing the
+ // number of conditional checks required on every iteration.
+ constexpr int32_t chars_per_block = 4;
+ constexpr int32_t unroll_threshold = 2 * chars_per_block;
+ vixl::aarch64::Label loop1, loop2, pre_loop2, done;
+
+ Register length_tmp = src_stop_addr.W();
+ Register tmp = temps.AcquireRegisterOfSize(char_size * chars_per_block * kBitsPerByte);
+
+ auto emitHeadLoop = [&]() {
+ __ Bind(&loop1);
+ __ Ldr(tmp, MemOperand(src_curr_addr, char_size * chars_per_block, PostIndex));
+ __ Subs(length_tmp, length_tmp, chars_per_block);
+ __ Str(tmp, MemOperand(dst_curr_addr, char_size * chars_per_block, PostIndex));
+ __ B(&loop1, ge);
+ };
+
+ auto emitTailLoop = [&]() {
+ __ Bind(&loop2);
+ __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
+ __ Subs(length_tmp, length_tmp, 1);
+ __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
+ __ B(&loop2, gt);
+ };
+
+ auto emitUnrolledTailLoop = [&](const int32_t tail_length) {
+ DCHECK_LT(tail_length, 4);
+
+ // Don't use post-index addressing, and instead add a constant offset later.
+ if ((tail_length & 2) != 0) {
+ __ Ldr(tmp.W(), MemOperand(src_curr_addr));
+ __ Str(tmp.W(), MemOperand(dst_curr_addr));
+ }
+ if ((tail_length & 1) != 0) {
+ const int32_t offset = (tail_length & ~1) * char_size;
+ __ Ldrh(tmp, MemOperand(src_curr_addr, offset));
+ __ Strh(tmp, MemOperand(dst_curr_addr, offset));
+ }
+ };
+
+ if (length.IsConstant()) {
+ const int32_t constant_length = length.GetConstant()->AsIntConstant()->GetValue();
+ if (constant_length >= unroll_threshold) {
+ __ Mov(length_tmp, constant_length - chars_per_block);
+ emitHeadLoop();
+ } else {
+ static_assert(unroll_threshold == 8, "The unroll_threshold must be 8.");
+ // Fully unroll both the head and tail loops.
+ if ((constant_length & 4) != 0) {
+ __ Ldr(tmp, MemOperand(src_curr_addr, 4 * char_size, PostIndex));
+ __ Str(tmp, MemOperand(dst_curr_addr, 4 * char_size, PostIndex));
+ }
+ }
+ emitUnrolledTailLoop(constant_length % chars_per_block);
+ } else {
+ Register length_reg = WRegisterFrom(length);
+ __ Subs(length_tmp, length_reg, chars_per_block);
+ __ B(&pre_loop2, lt);
+
+ emitHeadLoop();
+
+ __ Bind(&pre_loop2);
+ __ Adds(length_tmp, length_tmp, chars_per_block);
+ __ B(&done, eq);
+
+ emitTailLoop();
+ }
+
+ __ Bind(&done);
__ Bind(slow_path->GetExitLabel());
}
@@ -2814,7 +2897,7 @@ static constexpr int32_t kSystemArrayCopyThreshold = 128;
void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2866,7 +2949,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Temporary register IP0, obtained from the VIXL scratch register
// pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
// (because that register is clobbered by ReadBarrierMarkRegX
@@ -2884,7 +2967,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
MacroAssembler* masm = GetVIXLAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -2991,7 +3074,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
UseScratchRegisterScope temps(masm);
Location temp3_loc; // Used only for Baker read barrier.
Register temp3;
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
temp3_loc = locations->GetTemp(2);
temp3 = WRegisterFrom(temp3_loc);
} else {
@@ -3004,7 +3087,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
@@ -3165,7 +3248,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
temp1_loc,
@@ -3215,7 +3298,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
__ Cbz(WRegisterFrom(length), &done);
}
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// TODO: Also convert this intrinsic to the IsGcMarking strategy?
// SystemArrayCopy implementation for Baker read barriers (see
@@ -3335,7 +3418,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
}
// We only need one card marking on the destination array.
- codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null= */ false);
+ codegen_->MarkGCCard(dest.W(), Register(), /* emit_null_check= */ false);
__ Bind(intrinsic_slow_path->GetExitLabel());
}
@@ -3451,7 +3534,7 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
void IntrinsicLocationsBuilderARM64::VisitReferenceGetReferent(HInvoke* invoke) {
IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && invoke->GetLocations() != nullptr) {
+ if (gUseReadBarrier && kUseBakerReadBarrier && invoke->GetLocations() != nullptr) {
invoke->GetLocations()->AddTemp(Location::RequiresRegister());
}
}
@@ -3466,7 +3549,7 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
codegen_->AddSlowPath(slow_path);
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Check self->GetWeakRefAccessEnabled().
UseScratchRegisterScope temps(masm);
Register temp = temps.AcquireW();
@@ -3493,7 +3576,7 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
// Load the value from the field.
uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
out,
WRegisterFrom(obj),
@@ -3533,7 +3616,7 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceRefersTo(HInvoke* invoke) {
__ Cmp(tmp, other);
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
DCHECK(kUseBakerReadBarrier);
vixl::aarch64::Label calculate_result;
@@ -4629,7 +4712,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
method.X(),
ArtField::DeclaringClassOffset().Int32Value(),
/*fixup_label=*/ nullptr,
- kCompilerReadBarrierOption);
+ gCompilerReadBarrierOption);
}
}
} else {
@@ -4673,8 +4756,8 @@ static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
uint32_t number_of_arguments = invoke->GetNumberOfArguments();
for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
HInstruction* arg = invoke->InputAt(arg_index);
- if (IsConstantZeroBitPattern(arg)) {
- locations->SetInAt(arg_index, Location::ConstantLocation(arg->AsConstant()));
+ if (IsZeroBitPattern(arg)) {
+ locations->SetInAt(arg_index, Location::ConstantLocation(arg));
} else if (DataType::IsFloatingPointType(arg->GetType())) {
locations->SetInAt(arg_index, Location::RequiresFpuRegister());
} else {
@@ -4683,7 +4766,7 @@ static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
}
// Add a temporary for offset.
- if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
// To preserve the offset value across the non-Baker read barrier slow path
// for loading the declaring class, use a fixed callee-save register.
@@ -4706,7 +4789,7 @@ static void CreateVarHandleGetLocations(HInvoke* invoke) {
return;
}
- if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
invoke->GetType() == DataType::Type::kReference &&
invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
@@ -4746,7 +4829,7 @@ static void GenerateVarHandleGet(HInvoke* invoke,
DCHECK(use_load_acquire || order == std::memory_order_relaxed);
// Load the value from the target location.
- if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) {
// Piggy-back on the field load path using introspection for the Baker read barrier.
// The `target.offset` is a temporary, use it for field address.
Register tmp_ptr = target.offset.X();
@@ -4898,7 +4981,7 @@ static void GenerateVarHandleSet(HInvoke* invoke,
}
if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) {
- codegen->MarkGCCard(target.object, Register(value), /*value_can_be_null=*/ true);
+ codegen->MarkGCCard(target.object, Register(value), /* emit_null_check= */ true);
}
if (slow_path != nullptr) {
@@ -4947,7 +5030,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo
uint32_t number_of_arguments = invoke->GetNumberOfArguments();
DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
- if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
value_type == DataType::Type::kReference) {
// Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
// the passed reference and reloads it from the field. This breaks the read barriers
@@ -4961,7 +5044,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo
LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
// We need callee-save registers for both the class object and offset instead of
// the temporaries reserved in CreateVarHandleCommonLocations().
static_assert(POPCOUNT(kArm64CalleeSaveRefSpills) >= 2u);
@@ -4985,16 +5068,16 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo
// Add a temporary for old value and exclusive store result if floating point
// `expected` and/or `new_value` take scratch registers.
size_t available_scratch_registers =
- (IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) ? 1u : 0u) +
- (IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) ? 1u : 0u);
+ (IsZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) ? 1u : 0u) +
+ (IsZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) ? 1u : 0u);
size_t temps_needed = /* pointer, old value, store result */ 3u - available_scratch_registers;
// We can reuse the declaring class (if present) and offset temporary.
if (temps_needed > old_temp_count) {
locations->AddRegisterTemps(temps_needed - old_temp_count);
}
} else if ((value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) &&
- !IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) &&
- !IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) &&
+ !IsZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) &&
+ !IsZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) &&
GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
// Allocate a normal temporary for store result in the non-native byte order path
// because scratch registers are used by the byte-swapped `expected` and `new_value`.
@@ -5002,7 +5085,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo
locations->AddTemp(Location::RequiresRegister());
}
}
- if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
+ if (gUseReadBarrier && value_type == DataType::Type::kReference) {
// Add a temporary for the `old_value_temp` in slow path.
locations->AddTemp(Location::RequiresRegister());
}
@@ -5068,7 +5151,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
// except for references that need the offset for the read barrier.
UseScratchRegisterScope temps(masm);
Register tmp_ptr = target.offset.X();
- if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
+ if (gUseReadBarrier && value_type == DataType::Type::kReference) {
tmp_ptr = temps.AcquireX();
}
__ Add(tmp_ptr, target.object.X(), target.offset.X());
@@ -5151,7 +5234,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
vixl::aarch64::Label* exit_loop = &exit_loop_label;
vixl::aarch64::Label* cmp_failure = &exit_loop_label;
- if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
+ if (gUseReadBarrier && value_type == DataType::Type::kReference) {
// The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
// reloaded old value for subsequent CAS in the slow path. It cannot be a scratch register.
size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
@@ -5296,7 +5379,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
return;
}
- if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
invoke->GetType() == DataType::Type::kReference) {
// Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
// the passed reference and reloads it from the field, thus seeing the new value
@@ -5316,7 +5399,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
// We can reuse the declaring class temporary if present.
if (old_temp_count == 1u &&
- !IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
+ !IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
// Add a temporary for `old_value` if floating point `new_value` takes a scratch register.
locations->AddTemp(Location::RequiresRegister());
}
@@ -5327,7 +5410,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
if (old_temp_count == 1u &&
(get_and_update_op != GetAndUpdateOp::kSet && get_and_update_op != GetAndUpdateOp::kAdd) &&
GetExpectedVarHandleCoordinatesCount(invoke) == 2u &&
- !IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
+ !IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
DataType::Type value_type =
GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
if (value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) {
@@ -5372,7 +5455,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
// except for references that need the offset for the non-Baker read barrier.
UseScratchRegisterScope temps(masm);
Register tmp_ptr = target.offset.X();
- if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
value_type == DataType::Type::kReference) {
tmp_ptr = temps.AcquireX();
}
@@ -5402,7 +5485,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
// the new value unless it is zero bit pattern (+0.0f or +0.0) and need another one
// in GenerateGetAndUpdate(). We have allocated a normal temporary to handle that.
old_value = CPURegisterFrom(locations->GetTemp(1u), load_store_type);
- } else if ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) &&
+ } else if ((gUseReadBarrier && kUseBakerReadBarrier) &&
value_type == DataType::Type::kReference) {
// Load the old value initially to a scratch register.
// We shall move it to `out` later with a read barrier.
@@ -5450,7 +5533,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
__ Sxtb(out.W(), old_value.W());
} else if (value_type == DataType::Type::kInt16) {
__ Sxth(out.W(), old_value.W());
- } else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
+ } else if (gUseReadBarrier && value_type == DataType::Type::kReference) {
if (kUseBakerReadBarrier) {
codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(out.W(), old_value.W());
} else {
@@ -5647,7 +5730,7 @@ void VarHandleSlowPathARM64::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
// Byte order check. For native byte order return to the main path.
if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet &&
- IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
+ IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
// There is no reason to differentiate between native byte order and byte-swap
// for setting a zero bit pattern. Just return to the main path.
__ B(GetNativeByteOrderLabel());
@@ -5677,42 +5760,9 @@ void VarHandleSlowPathARM64::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
__ B(GetExitLabel());
}
-UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendObject);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendString);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharSequence);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharArray);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendBoolean);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendChar);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendInt);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendLong);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendFloat);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendDouble);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString);
-UNIMPLEMENTED_INTRINSIC(ARM64, SystemArrayCopyByte);
-UNIMPLEMENTED_INTRINSIC(ARM64, SystemArrayCopyInt);
-
-// 1.8.
-UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt)
-UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong)
-UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt)
-UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong)
-UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject)
-
-UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvokeExact)
-UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvoke)
-
-// OpenJDK 11
-UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndAddInt)
-UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndAddLong)
-UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndSetInt)
-UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndSetLong)
-UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndSetObject)
+#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(ARM64, Name)
+UNIMPLEMENTED_INTRINSIC_LIST_ARM64(MARK_UNIMPLEMENTED);
+#undef MARK_UNIMPLEMENTED
UNREACHABLE_INTRINSICS(ARM64)
diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h
index 9c46efddec..a0ccf87f7b 100644
--- a/compiler/optimizing/intrinsics_arm64.h
+++ b/compiler/optimizing/intrinsics_arm64.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_ARM64_H_
#define ART_COMPILER_OPTIMIZING_INTRINSICS_ARM64_H_
+#include "base/macros.h"
#include "intrinsics.h"
namespace vixl {
@@ -27,7 +28,7 @@ class MacroAssembler;
} // namespace aarch64
} // namespace vixl
-namespace art {
+namespace art HIDDEN {
class ArenaAllocator;
class HInvokeStaticOrDirect;
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index d850cadc2b..266b5bc799 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -34,7 +34,7 @@
#include "aarch32/constants-aarch32.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
#define __ assembler->GetVIXLAssembler()->
@@ -120,7 +120,7 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
: SlowPathCodeARMVIXL(instruction) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
}
@@ -1242,7 +1242,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invo
void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -1265,7 +1265,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
locations->SetInAt(4, Location::RequiresRegister());
}
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Temporary register IP cannot be used in
// ReadBarrierSystemArrayCopySlowPathARM (because that register
// is clobbered by ReadBarrierMarkRegX entry points). Get an extra
@@ -1339,7 +1339,7 @@ static void CheckPosition(ArmVIXLAssembler* assembler,
void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
ArmVIXLAssembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -1453,7 +1453,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
@@ -1584,7 +1584,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false);
@@ -1621,7 +1621,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
__ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target= */ false);
}
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// TODO: Also convert this intrinsic to the IsGcMarking strategy?
// SystemArrayCopy implementation for Baker read barriers (see
@@ -1723,7 +1723,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
}
// We only need one card marking on the destination array.
- codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null= */ false);
+ codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* emit_null_check= */ false);
__ Bind(intrinsic_slow_path->GetExitLabel());
}
@@ -2511,7 +2511,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
codegen_->AddSlowPath(slow_path);
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Check self->GetWeakRefAccessEnabled().
UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
vixl32::Register temp = temps.Acquire();
@@ -2539,7 +2539,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
// Load the value from the field.
uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
out,
RegisterFrom(obj),
@@ -2587,7 +2587,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
assembler->MaybeUnpoisonHeapReference(tmp);
codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
DCHECK(kUseBakerReadBarrier);
vixl32::Label calculate_result;
@@ -2613,7 +2613,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
__ Bind(&calculate_result);
} else {
- DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK(!gUseReadBarrier);
__ Sub(out, tmp, other);
}
@@ -2732,7 +2732,7 @@ static void GenerateIntrinsicGet(HInvoke* invoke,
}
break;
case DataType::Type::kReference:
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Piggy-back on the field load path using introspection for the Baker read barrier.
vixl32::Register temp = RegisterFrom(maybe_temp);
__ Add(temp, base, offset);
@@ -2777,7 +2777,7 @@ static void GenerateIntrinsicGet(HInvoke* invoke,
codegen->GenerateMemoryBarrier(
seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
}
- if (type == DataType::Type::kReference && !(kEmitCompilerReadBarrier && kUseBakerReadBarrier)) {
+ if (type == DataType::Type::kReference && !(gUseReadBarrier && kUseBakerReadBarrier)) {
Location base_loc = LocationFrom(base);
Location index_loc = LocationFrom(offset);
codegen->MaybeGenerateReadBarrierSlow(invoke, out, out, base_loc, /* offset=*/ 0u, index_loc);
@@ -2802,7 +2802,7 @@ static void CreateUnsafeGetLocations(HInvoke* invoke,
CodeGeneratorARMVIXL* codegen,
DataType::Type type,
bool atomic) {
- bool can_call = kEmitCompilerReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
+ bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
LocationSummary* locations =
new (allocator) LocationSummary(invoke,
@@ -2818,7 +2818,7 @@ static void CreateUnsafeGetLocations(HInvoke* invoke,
locations->SetInAt(2, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(),
(can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
- if ((kEmitCompilerReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) ||
+ if ((gUseReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) ||
(type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) {
// We need a temporary register for the read barrier marking slow
// path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier,
@@ -2837,7 +2837,7 @@ static void GenUnsafeGet(HInvoke* invoke,
vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only.
Location out = locations->Out();
Location maybe_temp = Location::NoLocation();
- if ((kEmitCompilerReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) ||
+ if ((gUseReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) ||
(type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) {
maybe_temp = locations->GetTemp(0);
}
@@ -3470,7 +3470,7 @@ static void GenerateCompareAndSet(CodeGeneratorARMVIXL* codegen,
// branch goes to the read barrier slow path that clobbers `success` anyway.
bool init_failure_for_cmp =
success.IsValid() &&
- !(kEmitCompilerReadBarrier && type == DataType::Type::kReference && expected.IsRegister());
+ !(gUseReadBarrier && type == DataType::Type::kReference && expected.IsRegister());
// Instruction scheduling: Loading a constant between LDREX* and using the loaded value
// is essentially free, so prepare the failure value here if we can.
bool init_failure_for_cmp_early =
@@ -3655,7 +3655,7 @@ class ReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL {
};
static void CreateUnsafeCASLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- const bool can_call = kEmitCompilerReadBarrier && IsUnsafeCASObject(invoke);
+ const bool can_call = gUseReadBarrier && IsUnsafeCASObject(invoke);
LocationSummary* locations =
new (allocator) LocationSummary(invoke,
can_call
@@ -3706,7 +3706,7 @@ static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMV
vixl32::Label* exit_loop = &exit_loop_label;
vixl32::Label* cmp_failure = &exit_loop_label;
- if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) {
+ if (gUseReadBarrier && type == DataType::Type::kReference) {
// If marking, check if the stored reference is a from-space reference to the same
// object as the to-space reference `expected`. If so, perform a custom CAS loop.
ReadBarrierCasSlowPathARMVIXL* slow_path =
@@ -3770,7 +3770,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* i
}
void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers (b/173104084).
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -3798,7 +3798,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* invo
}
void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers (b/173104084).
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
GenUnsafeCas(invoke, DataType::Type::kReference, codegen_);
}
@@ -4351,7 +4351,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
LocationFrom(target.object),
method,
ArtField::DeclaringClassOffset().Int32Value(),
- kCompilerReadBarrierOption);
+ gCompilerReadBarrierOption);
}
}
} else {
@@ -4403,7 +4403,7 @@ static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
}
// Add a temporary for offset.
- if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
// To preserve the offset value across the non-Baker read barrier slow path
// for loading the declaring class, use a fixed callee-save register.
@@ -4428,7 +4428,7 @@ static void CreateVarHandleGetLocations(HInvoke* invoke,
return;
}
- if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
invoke->GetType() == DataType::Type::kReference &&
invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
@@ -4476,7 +4476,7 @@ static void GenerateVarHandleGet(HInvoke* invoke,
Location maybe_temp = Location::NoLocation();
Location maybe_temp2 = Location::NoLocation();
Location maybe_temp3 = Location::NoLocation();
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) {
+ if (gUseReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) {
// Reuse the offset temporary.
maybe_temp = LocationFrom(target.offset);
} else if (DataType::Is64BitType(type) && Use64BitExclusiveLoadStore(atomic, codegen)) {
@@ -4590,7 +4590,7 @@ static void CreateVarHandleSetLocations(HInvoke* invoke,
HInstruction* arg = invoke->InputAt(number_of_arguments - 1u);
bool has_reverse_bytes_slow_path =
(expected_coordinates_count == 2u) &&
- !(arg->IsConstant() && arg->AsConstant()->IsZeroBitPattern());
+ !IsZeroBitPattern(arg);
if (Use64BitExclusiveLoadStore(atomic, codegen)) {
// We need 4 temporaries in the byte array view slow path. Otherwise, we need
// 2 or 3 temporaries for GenerateIntrinsicSet() depending on the value type.
@@ -4699,7 +4699,7 @@ static void GenerateVarHandleSet(HInvoke* invoke,
vixl32::Register temp = target.offset;
vixl32::Register card = temps.Acquire();
vixl32::Register value_reg = RegisterFrom(value);
- codegen->MarkGCCard(temp, card, target.object, value_reg, /*value_can_be_null=*/ true);
+ codegen->MarkGCCard(temp, card, target.object, value_reg, /* emit_null_check= */ true);
}
if (slow_path != nullptr) {
@@ -4749,7 +4749,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo
uint32_t number_of_arguments = invoke->GetNumberOfArguments();
DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
- if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
value_type == DataType::Type::kReference) {
// Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
// the passed reference and reloads it from the field. This breaks the read barriers
@@ -4763,7 +4763,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo
LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
// We need callee-save registers for both the class object and offset instead of
// the temporaries reserved in CreateVarHandleCommonLocations().
static_assert(POPCOUNT(kArmCalleeSaveRefSpills) >= 2u);
@@ -4799,7 +4799,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo
locations->AddRegisterTemps(2u);
}
}
- if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
+ if (gUseReadBarrier && value_type == DataType::Type::kReference) {
// Add a temporary for store result, also used for the `old_value_temp` in slow path.
locations->AddTemp(Location::RequiresRegister());
}
@@ -4930,7 +4930,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
vixl32::Label* exit_loop = &exit_loop_label;
vixl32::Label* cmp_failure = &exit_loop_label;
- if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
+ if (gUseReadBarrier && value_type == DataType::Type::kReference) {
// The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
// reloaded old value for subsequent CAS in the slow path. This must not clobber `old_value`.
vixl32::Register old_value_temp = return_success ? RegisterFrom(out) : store_result;
@@ -5086,7 +5086,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
return;
}
- if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
invoke->GetType() == DataType::Type::kReference) {
// Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
// the passed reference and reloads it from the field, thus seeing the new value
@@ -5107,7 +5107,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
// Add temps needed to do the GenerateGetAndUpdate() with core registers.
size_t temps_needed = (value_type == DataType::Type::kFloat64) ? 5u : 3u;
locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
- } else if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ } else if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
value_type == DataType::Type::kReference) {
// We need to preserve the declaring class (if present) and offset for read barrier
// slow paths, so we must use a separate temporary for the exclusive store result.
@@ -5213,7 +5213,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
if (byte_swap) {
GenerateReverseBytes(assembler, DataType::Type::kInt32, arg, arg);
}
- } else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
+ } else if (gUseReadBarrier && value_type == DataType::Type::kReference) {
if (kUseBakerReadBarrier) {
// Load the old value initially to a temporary register.
// We shall move it to `out` later with a read barrier.
@@ -5296,7 +5296,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
} else {
__ Vmov(SRegisterFrom(out), RegisterFrom(old_value));
}
- } else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
+ } else if (gUseReadBarrier && value_type == DataType::Type::kReference) {
if (kUseBakerReadBarrier) {
codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(RegisterFrom(out),
RegisterFrom(old_value));
@@ -5517,7 +5517,7 @@ void VarHandleSlowPathARMVIXL::EmitByteArrayViewCode(CodeGenerator* codegen_in)
// Byte order check. For native byte order return to the main path.
if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet) {
HInstruction* arg = invoke->InputAt(invoke->GetNumberOfArguments() - 1u);
- if (arg->IsConstant() && arg->AsConstant()->IsZeroBitPattern()) {
+ if (IsZeroBitPattern(arg)) {
// There is no reason to differentiate between native byte order and byte-swap
// for setting a zero bit pattern. Just return to the main path.
__ B(GetNativeByteOrderLabel());
@@ -5549,69 +5549,9 @@ void VarHandleSlowPathARMVIXL::EmitByteArrayViewCode(CodeGenerator* codegen_in)
__ B(GetExitLabel());
}
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe?
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure.
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongDivideUnsigned)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32Update)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateBytes)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateByteBuffer)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToFloat)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToHalf)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Floor)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Ceil)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Rint)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Greater)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16GreaterEquals)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Less)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16LessEquals)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Compare)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Min)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Max)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMultiplyHigh)
-
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendObject);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendString);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharSequence);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharArray);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendBoolean);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendChar);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendInt);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendLong);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendFloat);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendDouble);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString);
-
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyByte);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyInt);
-
-// 1.8.
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFmaDouble)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFmaFloat)
-
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject)
-
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MethodHandleInvokeExact)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MethodHandleInvoke)
-
-// OpenJDK 11
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeCASLong) // High register pressure.
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndAddInt)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndAddLong)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndSetInt)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndSetLong)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndSetObject)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeCompareAndSetLong)
+#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(ARMVIXL, Name)
+UNIMPLEMENTED_INTRINSIC_LIST_ARM(MARK_UNIMPLEMENTED);
+#undef MARK_UNIMPLEMENTED
UNREACHABLE_INTRINSICS(ARMVIXL)
diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h
index 3103cec8f0..54475bcc7e 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.h
+++ b/compiler/optimizing/intrinsics_arm_vixl.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_
#define ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_
+#include "base/macros.h"
#include "intrinsics.h"
#include "utils/arm/assembler_arm_vixl.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h
index 19f5e332a8..13cabdafed 100644
--- a/compiler/optimizing/intrinsics_utils.h
+++ b/compiler/optimizing/intrinsics_utils.h
@@ -29,7 +29,7 @@
#include "utils/assembler.h"
#include "utils/label.h"
-namespace art {
+namespace art HIDDEN {
// Default slow-path for fallback (calling the managed code to handle the intrinsic) in an
// intrinsified call. This will copy the arguments into the positions for a regular call.
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 7d90aae984..d2072201f8 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -38,7 +38,7 @@
#include "utils/x86/assembler_x86.h"
#include "utils/x86/constants_x86.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
@@ -75,7 +75,7 @@ class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
public:
explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
: SlowPathCode(instruction) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
}
@@ -1699,7 +1699,7 @@ static void GenUnsafeGet(HInvoke* invoke,
case DataType::Type::kReference: {
Register output = output_loc.AsRegister<Register>();
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
if (kUseBakerReadBarrier) {
Address src(base, offset, ScaleFactor::TIMES_1, 0);
codegen->GenerateReferenceLoadWithBakerReadBarrier(
@@ -1757,7 +1757,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
HInvoke* invoke,
DataType::Type type,
bool is_volatile) {
- bool can_call = kEmitCompilerReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
+ bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
LocationSummary* locations =
new (allocator) LocationSummary(invoke,
can_call
@@ -2103,7 +2103,7 @@ void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
DataType::Type type,
HInvoke* invoke) {
- const bool can_call = kEmitCompilerReadBarrier &&
+ const bool can_call = gUseReadBarrier &&
kUseBakerReadBarrier &&
IsUnsafeCASObject(invoke);
LocationSummary* locations =
@@ -2175,7 +2175,7 @@ void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invo
void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2304,7 +2304,7 @@ static void GenReferenceCAS(HInvoke* invoke,
DCHECK_EQ(expected, EAX);
DCHECK_NE(temp, temp2);
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Need to make sure the reference stored in the field is a to-space
// one before attempting the CAS or the CAS could fail incorrectly.
codegen->GenerateReferenceLoadWithBakerReadBarrier(
@@ -2391,7 +2391,7 @@ static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codeg
if (type == DataType::Type::kReference) {
// The only read barrier implementation supporting the
// UnsafeCASObject intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
Register temp = locations->GetTemp(0).AsRegister<Register>();
Register temp2 = locations->GetTemp(1).AsRegister<Register>();
@@ -2413,7 +2413,7 @@ void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
// The only read barrier implementation supporting the
// UnsafeCASObject intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
GenCAS(DataType::Type::kReference, invoke, codegen_);
}
@@ -2443,7 +2443,7 @@ void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke)
void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
GenCAS(DataType::Type::kReference, invoke, codegen_);
}
@@ -2843,7 +2843,7 @@ static void GenSystemArrayCopyEndAddress(X86Assembler* assembler,
void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2875,7 +2875,7 @@ void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86Assembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -2995,7 +2995,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
// slow path.
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
@@ -3022,7 +3022,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
if (length.Equals(Location::RegisterLocation(temp3))) {
// When Baker read barriers are enabled, register `temp3`,
// which in the present case contains the `length` parameter,
@@ -3120,7 +3120,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
@@ -3151,7 +3151,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
// Compute the base source address in `temp1`.
GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// If it is needed (in the case of the fast-path loop), the base
// destination address is computed later, as `temp2` is used for
// intermediate computations.
@@ -3259,7 +3259,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
}
// We only need one card marking on the destination array.
- codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null= */ false);
+ codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* emit_null_check= */ false);
__ Bind(intrinsic_slow_path->GetExitLabel());
}
@@ -3377,7 +3377,7 @@ void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
codegen_->AddSlowPath(slow_path);
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Check self->GetWeakRefAccessEnabled().
ThreadOffset32 offset = Thread::WeakRefAccessEnabledOffset<kX86PointerSize>();
__ fs()->cmpl(Address::Absolute(offset),
@@ -3400,7 +3400,7 @@ void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
// Load the value from the field.
uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
out,
obj.AsRegister<Register>(),
@@ -3442,7 +3442,7 @@ void IntrinsicCodeGeneratorX86::VisitReferenceRefersTo(HInvoke* invoke) {
NearLabel end, return_true, return_false;
__ cmpl(out, other);
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
DCHECK(kUseBakerReadBarrier);
__ j(kEqual, &return_true);
@@ -3781,7 +3781,7 @@ static Register GenerateVarHandleFieldReference(HInvoke* invoke,
Location::RegisterLocation(temp),
Address(temp, declaring_class_offset),
/* fixup_label= */ nullptr,
- kCompilerReadBarrierOption);
+ gCompilerReadBarrierOption);
return temp;
}
@@ -3794,7 +3794,7 @@ static Register GenerateVarHandleFieldReference(HInvoke* invoke,
static void CreateVarHandleGetLocations(HInvoke* invoke) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -3836,7 +3836,7 @@ static void CreateVarHandleGetLocations(HInvoke* invoke) {
static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -3860,7 +3860,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) {
Address field_addr(ref, offset, TIMES_1, 0);
// Load the value from the field
- if (type == DataType::Type::kReference && kCompilerReadBarrierOption == kWithReadBarrier) {
+ if (type == DataType::Type::kReference && gCompilerReadBarrierOption == kWithReadBarrier) {
codegen->GenerateReferenceLoadWithBakerReadBarrier(
invoke, out, ref, field_addr, /* needs_null_check= */ false);
} else if (type == DataType::Type::kInt64 &&
@@ -3917,7 +3917,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
static void CreateVarHandleSetLocations(HInvoke* invoke) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -3963,7 +3963,7 @@ static void CreateVarHandleSetLocations(HInvoke* invoke) {
case DataType::Type::kInt64:
// We only handle constant non-atomic int64 values.
DCHECK(value->IsConstant());
- locations->SetInAt(value_index, Location::ConstantLocation(value->AsConstant()));
+ locations->SetInAt(value_index, Location::ConstantLocation(value));
break;
case DataType::Type::kReference:
locations->SetInAt(value_index, Location::RequiresRegister());
@@ -3990,7 +3990,7 @@ static void CreateVarHandleSetLocations(HInvoke* invoke) {
static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -4041,13 +4041,16 @@ static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
InstructionCodeGeneratorX86* instr_codegen =
down_cast<InstructionCodeGeneratorX86*>(codegen->GetInstructionVisitor());
// Store the value to the field
- instr_codegen->HandleFieldSet(invoke,
- value_index,
- value_type,
- Address(reference, offset, TIMES_1, 0),
- reference,
- is_volatile,
- /* value_can_be_null */ true);
+ instr_codegen->HandleFieldSet(
+ invoke,
+ value_index,
+ value_type,
+ Address(reference, offset, TIMES_1, 0),
+ reference,
+ is_volatile,
+ /* value_can_be_null */ true,
+ // Value can be null, and this write barrier is not being relied on for other sets.
+ WriteBarrierKind::kEmitWithNullCheck);
__ Bind(slow_path->GetExitLabel());
}
@@ -4087,7 +4090,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleSetOpaque(HInvoke* invoke) {
static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -4135,7 +4138,7 @@ static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) {
static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -4194,7 +4197,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codege
__ movd(locations->Out().AsFpuRegister<XmmRegister>(), EAX);
break;
case DataType::Type::kReference: {
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Need to make sure the reference stored in the field is a to-space
// one before attempting the CAS or the CAS could fail incorrectly.
codegen->GenerateReferenceLoadWithBakerReadBarrier(
@@ -4208,7 +4211,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codege
&temp2);
}
codegen->MarkGCCard(
- temp, temp2, reference, value.AsRegister<Register>(), /* value_can_be_null= */ false);
+ temp, temp2, reference, value.AsRegister<Register>(), /* emit_null_check= */ false);
if (kPoisonHeapReferences) {
__ movl(temp, value.AsRegister<Register>());
__ PoisonHeapReference(temp);
@@ -4258,7 +4261,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke)
static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -4322,7 +4325,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) {
static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -4441,7 +4444,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeRelease(HInvoke*
static void CreateVarHandleGetAndAddLocations(HInvoke* invoke) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -4490,7 +4493,7 @@ static void CreateVarHandleGetAndAddLocations(HInvoke* invoke) {
static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -4591,7 +4594,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke)
static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -4659,7 +4662,7 @@ static void GenerateBitwiseOp(HInvoke* invoke,
static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -4829,64 +4832,9 @@ void IntrinsicLocationsBuilderX86::VisitMathFmaFloat(HInvoke* invoke) {
}
}
-UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
-UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
-UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(X86, LongDivideUnsigned)
-UNIMPLEMENTED_INTRINSIC(X86, CRC32Update)
-UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes)
-UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateByteBuffer)
-UNIMPLEMENTED_INTRINSIC(X86, FP16ToFloat)
-UNIMPLEMENTED_INTRINSIC(X86, FP16ToHalf)
-UNIMPLEMENTED_INTRINSIC(X86, FP16Floor)
-UNIMPLEMENTED_INTRINSIC(X86, FP16Ceil)
-UNIMPLEMENTED_INTRINSIC(X86, FP16Rint)
-UNIMPLEMENTED_INTRINSIC(X86, FP16Greater)
-UNIMPLEMENTED_INTRINSIC(X86, FP16GreaterEquals)
-UNIMPLEMENTED_INTRINSIC(X86, FP16Less)
-UNIMPLEMENTED_INTRINSIC(X86, FP16LessEquals)
-UNIMPLEMENTED_INTRINSIC(X86, FP16Compare)
-UNIMPLEMENTED_INTRINSIC(X86, FP16Min)
-UNIMPLEMENTED_INTRINSIC(X86, FP16Max)
-UNIMPLEMENTED_INTRINSIC(X86, MathMultiplyHigh)
-
-UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
-UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter);
-UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend);
-UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength);
-UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendObject);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendString);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharSequence);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharArray);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendBoolean);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendChar);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendInt);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendLong);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendFloat);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendDouble);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString);
-
-// 1.8.
-
-UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
-UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong)
-UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt)
-UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong)
-UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject)
-
-UNIMPLEMENTED_INTRINSIC(X86, MethodHandleInvokeExact)
-UNIMPLEMENTED_INTRINSIC(X86, MethodHandleInvoke)
-
-// OpenJDK 11
-UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndAddInt)
-UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndAddLong)
-UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndSetInt)
-UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndSetLong)
-UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndSetObject)
+#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86, Name)
+UNIMPLEMENTED_INTRINSIC_LIST_X86(MARK_UNIMPLEMENTED);
+#undef MARK_UNIMPLEMENTED
UNREACHABLE_INTRINSICS(X86)
diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h
index ae150dad43..77c236d244 100644
--- a/compiler/optimizing/intrinsics_x86.h
+++ b/compiler/optimizing/intrinsics_x86.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_X86_H_
#define ART_COMPILER_OPTIMIZING_INTRINSICS_X86_H_
+#include "base/macros.h"
#include "intrinsics.h"
-namespace art {
+namespace art HIDDEN {
class ArenaAllocator;
class HInvokeStaticOrDirect;
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 3c31374f67..9d0d5f155e 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -36,7 +36,7 @@
#include "utils/x86_64/assembler_x86_64.h"
#include "utils/x86_64/constants_x86_64.h"
-namespace art {
+namespace art HIDDEN {
namespace x86_64 {
@@ -71,7 +71,7 @@ class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
public:
explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
: SlowPathCode(instruction) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
}
@@ -836,7 +836,7 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) {
void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -887,7 +887,7 @@ static void GenSystemArrayCopyAddresses(X86_64Assembler* assembler,
void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86_64Assembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -1002,7 +1002,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
// slow path.
bool did_unpoison = false;
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = dest->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
@@ -1034,7 +1034,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
// Bail out if the destination is not a non primitive array.
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ TMP = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, TMP_loc, temp1, component_offset, /* needs_null_check= */ false);
@@ -1055,7 +1055,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
// Bail out if the source is not a non primitive array.
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// For the same reason given earlier, `temp1` is not trashed by the
// read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
// /* HeapReference<Class> */ TMP = temp2->component_type_
@@ -1081,7 +1081,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (optimizations.GetDestinationIsTypedObjectArray()) {
NearLabel do_copy;
__ j(kEqual, &do_copy);
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
@@ -1109,7 +1109,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
@@ -1141,7 +1141,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
GenSystemArrayCopyAddresses(
GetAssembler(), type, src, src_pos, dest, dest_pos, length, temp1, temp2, temp3);
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// SystemArrayCopy implementation for Baker read barriers (see
// also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
//
@@ -1224,7 +1224,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
}
// We only need one card marking on the destination array.
- codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* value_can_be_null= */ false);
+ codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* emit_null_check= */ false);
__ Bind(intrinsic_slow_path->GetExitLabel());
}
@@ -1888,7 +1888,7 @@ static void GenUnsafeGet(HInvoke* invoke,
break;
case DataType::Type::kReference: {
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
if (kUseBakerReadBarrier) {
Address src(base, offset, ScaleFactor::TIMES_1, 0);
codegen->GenerateReferenceLoadWithBakerReadBarrier(
@@ -1930,7 +1930,7 @@ static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) {
}
static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- bool can_call = kEmitCompilerReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
+ bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
LocationSummary* locations =
new (allocator) LocationSummary(invoke,
can_call
@@ -2230,7 +2230,7 @@ void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke)
static void CreateUnsafeCASLocations(ArenaAllocator* allocator,
DataType::Type type,
HInvoke* invoke) {
- const bool can_call = kEmitCompilerReadBarrier &&
+ const bool can_call = gUseReadBarrier &&
kUseBakerReadBarrier &&
IsUnsafeCASObject(invoke);
LocationSummary* locations =
@@ -2253,7 +2253,7 @@ static void CreateUnsafeCASLocations(ArenaAllocator* allocator,
// Need two temporaries for MarkGCCard.
locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
locations->AddTemp(Location::RequiresRegister());
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
DCHECK(kUseBakerReadBarrier);
locations->AddTemp(Location::RequiresRegister());
@@ -2298,7 +2298,7 @@ void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* i
void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2438,7 +2438,7 @@ static void GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64* codegen,
CpuRegister temp3,
bool is_cmpxchg) {
// The only supported read barrier implementation is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
@@ -2447,7 +2447,7 @@ static void GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64* codegen,
codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
Address field_addr(base, offset, TIMES_1, 0);
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Need to make sure the reference stored in the field is a to-space
// one before attempting the CAS or the CAS could fail incorrectly.
codegen->GenerateReferenceLoadWithBakerReadBarrier(
@@ -2556,7 +2556,7 @@ static void GenCompareAndSetOrExchange(CodeGeneratorX86_64* codegen,
CpuRegister new_value_reg = new_value.AsRegister<CpuRegister>();
CpuRegister temp1 = locations->GetTemp(temp1_index).AsRegister<CpuRegister>();
CpuRegister temp2 = locations->GetTemp(temp2_index).AsRegister<CpuRegister>();
- CpuRegister temp3 = kEmitCompilerReadBarrier
+ CpuRegister temp3 = gUseReadBarrier
? locations->GetTemp(temp3_index).AsRegister<CpuRegister>()
: CpuRegister(kNoRegister);
DCHECK(RegsAreAllDifferent({base, offset, temp1, temp2, temp3}));
@@ -2624,7 +2624,7 @@ void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invo
void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
GenCAS(DataType::Type::kReference, invoke, codegen_);
}
@@ -3128,7 +3128,7 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
codegen_->AddSlowPath(slow_path);
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Check self->GetWeakRefAccessEnabled().
ThreadOffset64 offset = Thread::WeakRefAccessEnabledOffset<kX86_64PointerSize>();
__ gs()->cmpl(Address::Absolute(offset, /* no_rip= */ true),
@@ -3150,7 +3150,7 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
// Load the value from the field.
uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
out,
obj.AsRegister<CpuRegister>(),
@@ -3191,7 +3191,7 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
__ cmpl(out, other);
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
DCHECK(kUseBakerReadBarrier);
NearLabel calculate_result;
@@ -3771,7 +3771,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
Location::RegisterLocation(target.object),
Address(method, ArtField::DeclaringClassOffset()),
/*fixup_label=*/ nullptr,
- kCompilerReadBarrierOption);
+ gCompilerReadBarrierOption);
}
}
} else {
@@ -3790,7 +3790,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return false;
}
@@ -3876,7 +3876,7 @@ static void GenerateVarHandleGet(HInvoke* invoke,
Location out = locations->Out();
if (type == DataType::Type::kReference) {
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
DCHECK(kUseBakerReadBarrier);
codegen->GenerateReferenceLoadWithBakerReadBarrier(
invoke, out, CpuRegister(target.object), src, /* needs_null_check= */ false);
@@ -3985,16 +3985,19 @@ static void GenerateVarHandleSet(HInvoke* invoke,
Address dst(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
// Store the value to the field.
- codegen->GetInstructionCodegen()->HandleFieldSet(invoke,
- value_index,
- last_temp_index,
- value_type,
- dst,
- CpuRegister(target.object),
- is_volatile,
- is_atomic,
- /*value_can_be_null=*/ true,
- byte_swap);
+ codegen->GetInstructionCodegen()->HandleFieldSet(
+ invoke,
+ value_index,
+ last_temp_index,
+ value_type,
+ dst,
+ CpuRegister(target.object),
+ is_volatile,
+ is_atomic,
+ /*value_can_be_null=*/true,
+ byte_swap,
+ // Value can be null, and this write barrier is not being relied on for other sets.
+ WriteBarrierKind::kEmitWithNullCheck);
// setVolatile needs kAnyAny barrier, but HandleFieldSet takes care of that.
@@ -4070,7 +4073,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) {
// Need two temporaries for MarkGCCard.
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
DCHECK(kUseBakerReadBarrier);
locations->AddTemp(Location::RequiresRegister());
@@ -4085,7 +4088,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
CodeGeneratorX86_64* codegen,
bool is_cmpxchg,
bool byte_swap = false) {
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86_64Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -4218,7 +4221,7 @@ static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) {
// Need two temporaries for MarkGCCard.
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Need a third temporary for GenerateReferenceLoadWithBakerReadBarrier.
DCHECK(kUseBakerReadBarrier);
locations->AddTemp(Location::RequiresRegister());
@@ -4267,7 +4270,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke,
CpuRegister temp2 = locations->GetTemp(temp_count - 2).AsRegister<CpuRegister>();
CpuRegister valreg = value.AsRegister<CpuRegister>();
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
codegen->GenerateReferenceLoadWithBakerReadBarrier(
invoke,
locations->GetTemp(temp_count - 3),
@@ -4278,7 +4281,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke,
&temp1,
&temp2);
}
- codegen->MarkGCCard(temp1, temp2, ref, valreg, /*value_can_be_null=*/ false);
+ codegen->MarkGCCard(temp1, temp2, ref, valreg, /* emit_null_check= */ false);
DCHECK_EQ(valreg, out.AsRegister<CpuRegister>());
if (kPoisonHeapReferences) {
@@ -4647,7 +4650,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
bool need_any_store_barrier,
bool need_any_any_barrier,
bool byte_swap = false) {
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86_64Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -4987,57 +4990,9 @@ void VarHandleSlowPathX86_64::EmitByteArrayViewCode(CodeGeneratorX86_64* codegen
__ jmp(GetExitLabel());
}
-UNIMPLEMENTED_INTRINSIC(X86_64, CRC32Update)
-UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateBytes)
-UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateByteBuffer)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16ToFloat)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16ToHalf)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16Floor)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16Ceil)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16Rint)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16Greater)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16GreaterEquals)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16Less)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16LessEquals)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16Compare)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16Min)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16Max)
-
-UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferAppend);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferLength);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferToString);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendObject);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendString);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendCharSequence);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendCharArray);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendBoolean);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendChar);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendInt);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendLong);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendFloat);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendDouble);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderLength);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderToString);
-
-// 1.8.
-
-UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt)
-UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddLong)
-UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetInt)
-UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetLong)
-UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetObject)
-
-UNIMPLEMENTED_INTRINSIC(X86_64, MethodHandleInvokeExact)
-UNIMPLEMENTED_INTRINSIC(X86_64, MethodHandleInvoke)
-
-// OpenJDK 11
-UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndAddInt)
-UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndAddLong)
-UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndSetInt)
-UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndSetLong)
-UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndSetObject)
+#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86_64, Name)
+UNIMPLEMENTED_INTRINSIC_LIST_X86_64(MARK_UNIMPLEMENTED);
+#undef MARK_UNIMPLEMENTED
UNREACHABLE_INTRINSICS(X86_64)
diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h
index 199cfede1a..59fe815a94 100644
--- a/compiler/optimizing/intrinsics_x86_64.h
+++ b/compiler/optimizing/intrinsics_x86_64.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_
#define ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_
+#include "base/macros.h"
#include "intrinsics.h"
-namespace art {
+namespace art HIDDEN {
class ArenaAllocator;
class HInvokeStaticOrDirect;
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index 0edb23b857..0c791b640d 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -18,7 +18,7 @@
#include "side_effects_analysis.h"
-namespace art {
+namespace art HIDDEN {
static bool IsPhiOf(HInstruction* instruction, HBasicBlock* block) {
return instruction->IsPhi() && instruction->GetBlock() == block;
diff --git a/compiler/optimizing/licm.h b/compiler/optimizing/licm.h
index 9cafddb05a..1a86b6eb9f 100644
--- a/compiler/optimizing/licm.h
+++ b/compiler/optimizing/licm.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_LICM_H_
#define ART_COMPILER_OPTIMIZING_LICM_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
class SideEffectsAnalysis;
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index adc3cabe87..f8481099f4 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -17,12 +17,13 @@
#include "licm.h"
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "side_effects_analysis.h"
-namespace art {
+namespace art HIDDEN {
/**
* Fixture class for the LICM tests.
diff --git a/compiler/optimizing/linear_order.cc b/compiler/optimizing/linear_order.cc
index 58e00a810d..25ca866b2c 100644
--- a/compiler/optimizing/linear_order.cc
+++ b/compiler/optimizing/linear_order.cc
@@ -19,7 +19,7 @@
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
-namespace art {
+namespace art HIDDEN {
static bool InSameLoop(HLoopInformation* first_loop, HLoopInformation* second_loop) {
return first_loop == second_loop;
diff --git a/compiler/optimizing/linear_order.h b/compiler/optimizing/linear_order.h
index 151db001e1..75e75048a3 100644
--- a/compiler/optimizing/linear_order.h
+++ b/compiler/optimizing/linear_order.h
@@ -19,9 +19,10 @@
#include <type_traits>
+#include "base/macros.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
void LinearizeGraphInternal(const HGraph* graph, ArrayRef<HBasicBlock*> linear_order);
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index d56ae11ca9..01daa23511 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -17,6 +17,7 @@
#include <fstream>
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "code_generator.h"
#include "dex/dex_file.h"
@@ -28,9 +29,9 @@
#include "pretty_printer.h"
#include "ssa_liveness_analysis.h"
-namespace art {
+namespace art HIDDEN {
-class LinearizeTest : public OptimizingUnitTest {
+class LinearizeTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
template <size_t number_of_blocks>
void TestCode(const std::vector<uint16_t>& data,
diff --git a/compiler/optimizing/live_interval_test.cc b/compiler/optimizing/live_interval_test.cc
index c60386d7b7..b5d1336d4a 100644
--- a/compiler/optimizing/live_interval_test.cc
+++ b/compiler/optimizing/live_interval_test.cc
@@ -15,12 +15,13 @@
*/
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "optimizing_unit_test.h"
#include "ssa_liveness_analysis.h"
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
TEST(LiveInterval, GetStart) {
ArenaPoolAndAllocator pool;
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index bb8a4dc08e..fb1a23eef4 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -15,6 +15,7 @@
*/
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "code_generator.h"
#include "dex/dex_file.h"
@@ -25,9 +26,9 @@
#include "prepare_for_register_allocation.h"
#include "ssa_liveness_analysis.h"
-namespace art {
+namespace art HIDDEN {
-class LiveRangesTest : public OptimizingUnitTest {
+class LiveRangesTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
HGraph* BuildGraph(const std::vector<uint16_t>& data);
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index ba3787e9be..0b421cf9e6 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -15,6 +15,7 @@
*/
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "code_generator.h"
#include "dex/dex_file.h"
@@ -25,9 +26,9 @@
#include "prepare_for_register_allocation.h"
#include "ssa_liveness_analysis.h"
-namespace art {
+namespace art HIDDEN {
-class LivenessTest : public OptimizingUnitTest {
+class LivenessTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
void TestCode(const std::vector<uint16_t>& data, const char* expected);
};
diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc
index 3fe42aff2e..f1c50ac03c 100644
--- a/compiler/optimizing/load_store_analysis.cc
+++ b/compiler/optimizing/load_store_analysis.cc
@@ -19,7 +19,7 @@
#include "base/scoped_arena_allocator.h"
#include "optimizing/escape.h"
-namespace art {
+namespace art HIDDEN {
// A cap for the number of heap locations to prevent pathological time/space consumption.
// The number of heap locations for most of the methods stays below this threshold.
@@ -283,14 +283,6 @@ bool LoadStoreAnalysis::Run() {
heap_location_collector_.CleanUp();
return false;
}
- if (heap_location_collector_.HasVolatile() || heap_location_collector_.HasMonitorOps()) {
- // Don't do load/store elimination if the method has volatile field accesses or
- // monitor operations, for now.
- // TODO: do it right.
- heap_location_collector_.CleanUp();
- return false;
- }
-
heap_location_collector_.BuildAliasingMatrix();
heap_location_collector_.DumpReferenceStats(stats_);
return true;
diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h
index 4975bae2a2..c46a5b9cc1 100644
--- a/compiler/optimizing/load_store_analysis.h
+++ b/compiler/optimizing/load_store_analysis.h
@@ -20,6 +20,7 @@
#include "base/arena_allocator.h"
#include "base/arena_bit_vector.h"
#include "base/bit_vector-inl.h"
+#include "base/macros.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "base/stl_util.h"
@@ -28,7 +29,7 @@
#include "nodes.h"
#include "optimizing/optimizing_compiler_stats.h"
-namespace art {
+namespace art HIDDEN {
enum class LoadStoreAnalysisType {
kBasic,
@@ -170,14 +171,16 @@ class HeapLocation : public ArenaObject<kArenaAllocLSA> {
size_t offset,
HInstruction* index,
size_t vector_length,
- int16_t declaring_class_def_index)
+ int16_t declaring_class_def_index,
+ bool is_vec_op)
: ref_info_(ref_info),
type_(DataType::ToSigned(type)),
offset_(offset),
index_(index),
vector_length_(vector_length),
declaring_class_def_index_(declaring_class_def_index),
- has_aliased_locations_(false) {
+ has_aliased_locations_(false),
+ is_vec_op_(is_vec_op) {
DCHECK(ref_info != nullptr);
DCHECK((offset == kInvalidFieldOffset && index != nullptr) ||
(offset != kInvalidFieldOffset && index == nullptr));
@@ -188,6 +191,7 @@ class HeapLocation : public ArenaObject<kArenaAllocLSA> {
size_t GetOffset() const { return offset_; }
HInstruction* GetIndex() const { return index_; }
size_t GetVectorLength() const { return vector_length_; }
+ bool IsVecOp() const { return is_vec_op_; }
// Returns the definition of declaring class' dex index.
// It's kDeclaringClassDefIndexForArrays for an array element.
@@ -226,11 +230,12 @@ class HeapLocation : public ArenaObject<kArenaAllocLSA> {
// Declaring class's def's dex index.
// Invalid when this HeapLocation is not field access.
const int16_t declaring_class_def_index_;
-
// Has aliased heap locations in the method, due to either the
// reference is aliased or the array element is aliased via different
// index names.
bool has_aliased_locations_;
+ // Whether this HeapLocation represents a vector operation.
+ bool is_vec_op_;
DISALLOW_COPY_AND_ASSIGN(HeapLocation);
};
@@ -253,8 +258,6 @@ class HeapLocationCollector : public HGraphVisitor {
heap_locations_(allocator->Adapter(kArenaAllocLSA)),
aliasing_matrix_(allocator, kInitialAliasingMatrixBitVectorSize, true, kArenaAllocLSA),
has_heap_stores_(false),
- has_volatile_(false),
- has_monitor_operations_(false),
lse_type_(lse_type) {
aliasing_matrix_.ClearAllBits();
}
@@ -319,7 +322,8 @@ class HeapLocationCollector : public HGraphVisitor {
field->GetFieldOffset().SizeValue(),
nullptr,
HeapLocation::kScalar,
- field->GetDeclaringClassDefIndex());
+ field->GetDeclaringClassDefIndex(),
+ /*is_vec_op=*/false);
}
size_t GetArrayHeapLocation(HInstruction* instruction) const {
@@ -328,10 +332,10 @@ class HeapLocationCollector : public HGraphVisitor {
HInstruction* index = instruction->InputAt(1);
DataType::Type type = instruction->GetType();
size_t vector_length = HeapLocation::kScalar;
+ const bool is_vec_op = instruction->IsVecStore() || instruction->IsVecLoad();
if (instruction->IsArraySet()) {
type = instruction->AsArraySet()->GetComponentType();
- } else if (instruction->IsVecStore() ||
- instruction->IsVecLoad()) {
+ } else if (is_vec_op) {
HVecOperation* vec_op = instruction->AsVecOperation();
type = vec_op->GetPackedType();
vector_length = vec_op->GetVectorLength();
@@ -343,21 +347,14 @@ class HeapLocationCollector : public HGraphVisitor {
HeapLocation::kInvalidFieldOffset,
index,
vector_length,
- HeapLocation::kDeclaringClassDefIndexForArrays);
+ HeapLocation::kDeclaringClassDefIndexForArrays,
+ is_vec_op);
}
bool HasHeapStores() const {
return has_heap_stores_;
}
- bool HasVolatile() const {
- return has_volatile_;
- }
-
- bool HasMonitorOps() const {
- return has_monitor_operations_;
- }
-
// Find and return the heap location index in heap_locations_.
// NOTE: When heap locations are created, potentially aliasing/overlapping
// accesses are given different indexes. This find function also
@@ -373,7 +370,8 @@ class HeapLocationCollector : public HGraphVisitor {
size_t offset,
HInstruction* index,
size_t vector_length,
- int16_t declaring_class_def_index) const {
+ int16_t declaring_class_def_index,
+ bool is_vec_op) const {
DataType::Type lookup_type = DataType::ToSigned(type);
for (size_t i = 0; i < heap_locations_.size(); i++) {
HeapLocation* loc = heap_locations_[i];
@@ -382,7 +380,8 @@ class HeapLocationCollector : public HGraphVisitor {
loc->GetOffset() == offset &&
loc->GetIndex() == index &&
loc->GetVectorLength() == vector_length &&
- loc->GetDeclaringClassDefIndex() == declaring_class_def_index) {
+ loc->GetDeclaringClassDefIndex() == declaring_class_def_index &&
+ loc->IsVecOp() == is_vec_op) {
return i;
}
}
@@ -527,22 +526,20 @@ class HeapLocationCollector : public HGraphVisitor {
size_t offset,
HInstruction* index,
size_t vector_length,
- int16_t declaring_class_def_index) {
+ int16_t declaring_class_def_index,
+ bool is_vec_op) {
HInstruction* original_ref = HuntForOriginalReference(ref);
ReferenceInfo* ref_info = GetOrCreateReferenceInfo(original_ref);
size_t heap_location_idx = FindHeapLocationIndex(
- ref_info, type, offset, index, vector_length, declaring_class_def_index);
+ ref_info, type, offset, index, vector_length, declaring_class_def_index, is_vec_op);
if (heap_location_idx == kHeapLocationNotFound) {
- HeapLocation* heap_loc = new (allocator_)
- HeapLocation(ref_info, type, offset, index, vector_length, declaring_class_def_index);
+ HeapLocation* heap_loc = new (allocator_) HeapLocation(
+ ref_info, type, offset, index, vector_length, declaring_class_def_index, is_vec_op);
heap_locations_.push_back(heap_loc);
}
}
void VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) {
- if (field_info.IsVolatile()) {
- has_volatile_ = true;
- }
DataType::Type type = field_info.GetFieldType();
const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex();
const size_t offset = field_info.GetFieldOffset().SizeValue();
@@ -551,19 +548,22 @@ class HeapLocationCollector : public HGraphVisitor {
offset,
nullptr,
HeapLocation::kScalar,
- declaring_class_def_index);
+ declaring_class_def_index,
+ /*is_vec_op=*/false);
}
void VisitArrayAccess(HInstruction* array,
HInstruction* index,
DataType::Type type,
- size_t vector_length) {
+ size_t vector_length,
+ bool is_vec_op) {
MaybeCreateHeapLocation(array,
type,
HeapLocation::kInvalidFieldOffset,
index,
vector_length,
- HeapLocation::kDeclaringClassDefIndexForArrays);
+ HeapLocation::kDeclaringClassDefIndexForArrays,
+ is_vec_op);
}
void VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet* instruction) override {
@@ -597,7 +597,7 @@ class HeapLocationCollector : public HGraphVisitor {
HInstruction* array = instruction->InputAt(0);
HInstruction* index = instruction->InputAt(1);
DataType::Type type = instruction->GetType();
- VisitArrayAccess(array, index, type, HeapLocation::kScalar);
+ VisitArrayAccess(array, index, type, HeapLocation::kScalar, /*is_vec_op=*/false);
CreateReferenceInfoForReferenceType(instruction);
}
@@ -605,7 +605,7 @@ class HeapLocationCollector : public HGraphVisitor {
HInstruction* array = instruction->InputAt(0);
HInstruction* index = instruction->InputAt(1);
DataType::Type type = instruction->GetComponentType();
- VisitArrayAccess(array, index, type, HeapLocation::kScalar);
+ VisitArrayAccess(array, index, type, HeapLocation::kScalar, /*is_vec_op=*/false);
has_heap_stores_ = true;
}
@@ -613,7 +613,7 @@ class HeapLocationCollector : public HGraphVisitor {
HInstruction* array = instruction->InputAt(0);
HInstruction* index = instruction->InputAt(1);
DataType::Type type = instruction->GetPackedType();
- VisitArrayAccess(array, index, type, instruction->GetVectorLength());
+ VisitArrayAccess(array, index, type, instruction->GetVectorLength(), /*is_vec_op=*/true);
CreateReferenceInfoForReferenceType(instruction);
}
@@ -621,7 +621,7 @@ class HeapLocationCollector : public HGraphVisitor {
HInstruction* array = instruction->InputAt(0);
HInstruction* index = instruction->InputAt(1);
DataType::Type type = instruction->GetPackedType();
- VisitArrayAccess(array, index, type, instruction->GetVectorLength());
+ VisitArrayAccess(array, index, type, instruction->GetVectorLength(), /*is_vec_op=*/true);
has_heap_stores_ = true;
}
@@ -637,18 +637,12 @@ class HeapLocationCollector : public HGraphVisitor {
CreateReferenceInfoForReferenceType(instruction);
}
- void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) override {
- has_monitor_operations_ = true;
- }
-
ScopedArenaAllocator* allocator_;
ScopedArenaVector<ReferenceInfo*> ref_info_array_; // All references used for heap accesses.
ScopedArenaVector<HeapLocation*> heap_locations_; // All heap locations.
ArenaBitVector aliasing_matrix_; // aliasing info between each pair of locations.
bool has_heap_stores_; // If there is no heap stores, LSE acts as GVN with better
// alias analysis and won't be as effective.
- bool has_volatile_; // If there are volatile field accesses.
- bool has_monitor_operations_; // If there are monitor operations.
LoadStoreAnalysisType lse_type_;
DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector);
diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc
index 3c26c8d6ce..865febbd31 100644
--- a/compiler/optimizing/load_store_analysis_test.cc
+++ b/compiler/optimizing/load_store_analysis_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "base/macros.h"
#include "load_store_analysis.h"
#include <array>
@@ -36,7 +37,7 @@
#include "optimizing_unit_test.h"
#include "scoped_thread_state_change.h"
-namespace art {
+namespace art HIDDEN {
class LoadStoreAnalysisTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
public:
@@ -117,12 +118,13 @@ TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) {
size_t field = HeapLocation::kInvalidFieldOffset;
size_t vec = HeapLocation::kScalar;
size_t class_def = HeapLocation::kDeclaringClassDefIndexForArrays;
+ const bool is_vec_op = false;
size_t loc1 = heap_location_collector.FindHeapLocationIndex(
- ref, type, field, c1, vec, class_def);
+ ref, type, field, c1, vec, class_def, is_vec_op);
size_t loc2 = heap_location_collector.FindHeapLocationIndex(
- ref, type, field, c2, vec, class_def);
+ ref, type, field, c2, vec, class_def, is_vec_op);
size_t loc3 = heap_location_collector.FindHeapLocationIndex(
- ref, type, field, index, vec, class_def);
+ ref, type, field, index, vec, class_def, is_vec_op);
// must find this reference info for array in HeapLocationCollector.
ASSERT_TRUE(ref != nullptr);
// must find these heap locations;
@@ -142,7 +144,7 @@ TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) {
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3));
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3));
- EXPECT_TRUE(CheckGraph(graph_));
+ EXPECT_TRUE(CheckGraph());
}
TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) {
@@ -223,15 +225,14 @@ TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) {
// accesses to different fields of the same object should not alias.
ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
- EXPECT_TRUE(CheckGraph(graph_));
+ EXPECT_TRUE(CheckGraph());
}
TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) {
CreateGraph();
- HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph_);
- graph_->AddBlock(entry);
- graph_->SetEntryBlock(entry);
- graph_->BuildDominatorTree();
+ AdjacencyListGraph blks(
+ SetupFromAdjacencyList("entry", "exit", {{"entry", "body"}, {"body", "exit"}}));
+ HBasicBlock* body = blks.Get("body");
HInstruction* array = new (GetAllocator()) HParameterValue(
graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference);
@@ -261,23 +262,25 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) {
HInstruction* arr_set8 =
new (GetAllocator()) HArraySet(array, sub_neg1, c0, DataType::Type::kInt32, 0);
- entry->AddInstruction(array);
- entry->AddInstruction(index);
- entry->AddInstruction(add0);
- entry->AddInstruction(add1);
- entry->AddInstruction(sub0);
- entry->AddInstruction(sub1);
- entry->AddInstruction(sub_neg1);
- entry->AddInstruction(rev_sub1);
-
- entry->AddInstruction(arr_set1); // array[0] = c0
- entry->AddInstruction(arr_set2); // array[1] = c0
- entry->AddInstruction(arr_set3); // array[i+0] = c0
- entry->AddInstruction(arr_set4); // array[i+1] = c0
- entry->AddInstruction(arr_set5); // array[i-0] = c0
- entry->AddInstruction(arr_set6); // array[i-1] = c0
- entry->AddInstruction(arr_set7); // array[1-i] = c0
- entry->AddInstruction(arr_set8); // array[i-(-1)] = c0
+ body->AddInstruction(array);
+ body->AddInstruction(index);
+ body->AddInstruction(add0);
+ body->AddInstruction(add1);
+ body->AddInstruction(sub0);
+ body->AddInstruction(sub1);
+ body->AddInstruction(sub_neg1);
+ body->AddInstruction(rev_sub1);
+
+ body->AddInstruction(arr_set1); // array[0] = c0
+ body->AddInstruction(arr_set2); // array[1] = c0
+ body->AddInstruction(arr_set3); // array[i+0] = c0
+ body->AddInstruction(arr_set4); // array[i+1] = c0
+ body->AddInstruction(arr_set5); // array[i-0] = c0
+ body->AddInstruction(arr_set6); // array[i-1] = c0
+ body->AddInstruction(arr_set7); // array[1-i] = c0
+ body->AddInstruction(arr_set8); // array[i-(-1)] = c0
+
+ body->AddInstruction(new (GetAllocator()) HReturnVoid());
ScopedArenaAllocator allocator(graph_->GetArenaStack());
LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kBasic);
@@ -317,7 +320,7 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) {
loc2 = heap_location_collector.GetArrayHeapLocation(arr_set8);
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
- EXPECT_TRUE(CheckGraphSkipRefTypeInfoChecks(graph_));
+ EXPECT_TRUE(CheckGraph());
}
TEST_F(LoadStoreAnalysisTest, ArrayAliasingTest) {
@@ -891,7 +894,8 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape) {
{},
InvokeType::kStatic,
{ nullptr, 0 },
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
call_left->AsInvoke()->SetRawInputAt(0, new_inst);
left->AddInstruction(call_left);
@@ -1000,7 +1004,8 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape2) {
{},
InvokeType::kStatic,
{ nullptr, 0 },
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
call_left->AsInvoke()->SetRawInputAt(0, new_inst);
left->AddInstruction(call_left);
@@ -1123,7 +1128,8 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape3) {
{},
InvokeType::kStatic,
{ nullptr, 0 },
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
call_left->AsInvoke()->SetRawInputAt(0, new_inst);
left->AddInstruction(call_left);
@@ -1403,7 +1409,8 @@ TEST_F(LoadStoreAnalysisTest, TotalEscapeAdjacentNoPredicated) {
{},
InvokeType::kStatic,
{nullptr, 0},
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
call_left->AsInvoke()->SetRawInputAt(0, new_inst);
left->AddInstruction(call_left);
@@ -1504,7 +1511,8 @@ TEST_F(LoadStoreAnalysisTest, TotalEscapeAdjacent) {
{},
InvokeType::kStatic,
{ nullptr, 0 },
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
call_left->AsInvoke()->SetRawInputAt(0, new_inst);
left->AddInstruction(call_left);
@@ -1615,7 +1623,8 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape) {
{},
InvokeType::kStatic,
{ nullptr, 0 },
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
call_left->AsInvoke()->SetRawInputAt(0, new_inst);
left->AddInstruction(call_left);
@@ -1631,7 +1640,8 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape) {
{},
InvokeType::kStatic,
{ nullptr, 0 },
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst,
c0,
nullptr,
@@ -1800,7 +1810,8 @@ TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) {
{},
InvokeType::kStatic,
{ nullptr, 0 },
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
call_left->AsInvoke()->SetRawInputAt(0, new_inst);
high_left->AddInstruction(call_left);
@@ -1856,7 +1867,8 @@ TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) {
{},
InvokeType::kStatic,
{ nullptr, 0 },
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInstruction* goto_low_left = new (GetAllocator()) HGoto();
call_low_left->AsInvoke()->SetRawInputAt(0, new_inst);
low_left->AddInstruction(call_low_left);
@@ -2013,7 +2025,8 @@ TEST_F(LoadStoreAnalysisTest, PartialPhiPropagation1) {
{},
InvokeType::kStatic,
{nullptr, 0},
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInstruction* goto_left_merge = new (GetAllocator()) HGoto();
left_phi->SetRawInputAt(0, obj_param);
left_phi->SetRawInputAt(1, new_inst);
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 9b8f07e969..9cabb12a9f 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -319,7 +319,7 @@
* a hash map to the HeapLocationCollector.
*/
-namespace art {
+namespace art HIDDEN {
#define LSE_VLOG \
if (::art::LoadStoreElimination::kVerboseLoggingMode && VLOG_IS_ON(compiler)) LOG(INFO)
@@ -855,25 +855,6 @@ class LSEVisitor final : private HGraphDelegateVisitor {
}
}
- // `instruction` is being removed. Try to see if the null check on it
- // can be removed. This can happen if the same value is set in two branches
- // but not in dominators. Such as:
- // int[] a = foo();
- // if () {
- // a[0] = 2;
- // } else {
- // a[0] = 2;
- // }
- // // a[0] can now be replaced with constant 2, and the null check on it can be removed.
- void TryRemovingNullCheck(HInstruction* instruction) {
- HInstruction* prev = instruction->GetPrevious();
- if ((prev != nullptr) && prev->IsNullCheck() && (prev == instruction->InputAt(0))) {
- // Previous instruction is a null check for this instruction. Remove the null check.
- prev->ReplaceWith(prev->InputAt(0));
- prev->GetBlock()->RemoveInstruction(prev);
- }
- }
-
HInstruction* GetDefaultValue(DataType::Type type) {
switch (type) {
case DataType::Type::kReference:
@@ -993,13 +974,63 @@ class LSEVisitor final : private HGraphDelegateVisitor {
<< " but LSE should be the only source of predicated-ifield-gets!";
}
+ void HandleAcquireLoad(HInstruction* instruction) {
+ DCHECK((instruction->IsInstanceFieldGet() && instruction->AsInstanceFieldGet()->IsVolatile()) ||
+ (instruction->IsStaticFieldGet() && instruction->AsStaticFieldGet()->IsVolatile()) ||
+ (instruction->IsMonitorOperation() && instruction->AsMonitorOperation()->IsEnter()))
+ << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName();
+
+ // Acquire operations e.g. MONITOR_ENTER change the thread's view of the memory, so we must
+ // invalidate all current values.
+ ScopedArenaVector<ValueRecord>& heap_values =
+ heap_values_for_[instruction->GetBlock()->GetBlockId()];
+ for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
+ KeepStores(heap_values[i].stored_by);
+ heap_values[i].stored_by = Value::PureUnknown();
+ heap_values[i].value = Value::PartialUnknown(heap_values[i].value);
+ }
+
+ // Note that there's no need to record the load as subsequent acquire loads shouldn't be
+ // eliminated either.
+ }
+
+ void HandleReleaseStore(HInstruction* instruction) {
+ DCHECK((instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->IsVolatile()) ||
+ (instruction->IsStaticFieldSet() && instruction->AsStaticFieldSet()->IsVolatile()) ||
+ (instruction->IsMonitorOperation() && !instruction->AsMonitorOperation()->IsEnter()))
+ << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName();
+
+ // Release operations e.g. MONITOR_EXIT do not affect this thread's view of the memory, but
+ // they will push the modifications for other threads to see. Therefore, we must keep the
+ // stores but there's no need to clobber the value.
+ ScopedArenaVector<ValueRecord>& heap_values =
+ heap_values_for_[instruction->GetBlock()->GetBlockId()];
+ for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
+ KeepStores(heap_values[i].stored_by);
+ heap_values[i].stored_by = Value::PureUnknown();
+ }
+
+ // Note that there's no need to record the store as subsequent release store shouldn't be
+ // eliminated either.
+ }
+
void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override {
+ if (instruction->IsVolatile()) {
+ HandleAcquireLoad(instruction);
+ return;
+ }
+
HInstruction* object = instruction->InputAt(0);
const FieldInfo& field = instruction->GetFieldInfo();
VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(object, &field));
}
void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override {
+ if (instruction->IsVolatile()) {
+ HandleReleaseStore(instruction);
+ return;
+ }
+
HInstruction* object = instruction->InputAt(0);
const FieldInfo& field = instruction->GetFieldInfo();
HInstruction* value = instruction->InputAt(1);
@@ -1008,12 +1039,22 @@ class LSEVisitor final : private HGraphDelegateVisitor {
}
void VisitStaticFieldGet(HStaticFieldGet* instruction) override {
+ if (instruction->IsVolatile()) {
+ HandleAcquireLoad(instruction);
+ return;
+ }
+
HInstruction* cls = instruction->InputAt(0);
const FieldInfo& field = instruction->GetFieldInfo();
VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(cls, &field));
}
void VisitStaticFieldSet(HStaticFieldSet* instruction) override {
+ if (instruction->IsVolatile()) {
+ HandleReleaseStore(instruction);
+ return;
+ }
+
HInstruction* cls = instruction->InputAt(0);
const FieldInfo& field = instruction->GetFieldInfo();
HInstruction* value = instruction->InputAt(1);
@@ -1021,6 +1062,14 @@ class LSEVisitor final : private HGraphDelegateVisitor {
VisitSetLocation(instruction, idx, value);
}
+ void VisitMonitorOperation(HMonitorOperation* monitor_op) override {
+ if (monitor_op->IsEnter()) {
+ HandleAcquireLoad(monitor_op);
+ } else {
+ HandleReleaseStore(monitor_op);
+ }
+ }
+
void VisitArrayGet(HArrayGet* instruction) override {
VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
}
@@ -1040,8 +1089,8 @@ class LSEVisitor final : private HGraphDelegateVisitor {
}
void VisitDeoptimize(HDeoptimize* instruction) override {
- // If we are in a try catch, even singletons are observable.
- const bool in_try_catch = instruction->GetBlock()->GetTryCatchInformation() != nullptr;
+ // If we are in a try, even singletons are observable.
+ const bool inside_a_try = instruction->GetBlock()->IsTryBlock();
HBasicBlock* block = instruction->GetBlock();
ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
@@ -1053,7 +1102,7 @@ class LSEVisitor final : private HGraphDelegateVisitor {
// for singletons that don't escape in the deoptimization environment.
bool observable = true;
ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
- if (!in_try_catch && info->IsSingleton()) {
+ if (!inside_a_try && info->IsSingleton()) {
HInstruction* reference = info->GetReference();
// Finalizable objects always escape.
const bool finalizable_object =
@@ -1099,10 +1148,8 @@ class LSEVisitor final : private HGraphDelegateVisitor {
void HandleThrowingInstruction(HInstruction* instruction) {
DCHECK(instruction->CanThrow());
- // If we are inside of a try catch, singletons can become visible since we may not exit the
- // method.
- HandleExit(instruction->GetBlock(),
- instruction->GetBlock()->GetTryCatchInformation() != nullptr);
+ // If we are inside of a try, singletons can become visible since we may not exit the method.
+ HandleExit(instruction->GetBlock(), instruction->GetBlock()->IsTryBlock());
}
void VisitMethodEntryHook(HMethodEntryHook* method_entry) override {
@@ -1137,6 +1184,14 @@ class LSEVisitor final : private HGraphDelegateVisitor {
}
}
+ void VisitLoadMethodHandle(HLoadMethodHandle* load_method_handle) override {
+ HandleThrowingInstruction(load_method_handle);
+ }
+
+ void VisitLoadMethodType(HLoadMethodType* load_method_type) override {
+ HandleThrowingInstruction(load_method_type);
+ }
+
void VisitStringBuilderAppend(HStringBuilderAppend* sb_append) override {
HandleThrowingInstruction(sb_append);
}
@@ -1149,18 +1204,11 @@ class LSEVisitor final : private HGraphDelegateVisitor {
HandleThrowingInstruction(check_cast);
}
- void VisitMonitorOperation(HMonitorOperation* monitor_op) override {
- if (monitor_op->CanThrow()) {
- HandleThrowingInstruction(monitor_op);
- }
- }
-
void HandleInvoke(HInstruction* instruction) {
// If `instruction` can throw we have to presume all stores are visible.
const bool can_throw = instruction->CanThrow();
- // If we are in a try catch, even singletons are observable.
- const bool can_throw_in_try_catch =
- can_throw && instruction->GetBlock()->GetTryCatchInformation() != nullptr;
+ // If we are in a try, even singletons are observable.
+ const bool can_throw_inside_a_try = can_throw && instruction->GetBlock()->IsTryBlock();
SideEffects side_effects = instruction->GetSideEffects();
ScopedArenaVector<ValueRecord>& heap_values =
heap_values_for_[instruction->GetBlock()->GetBlockId()];
@@ -1186,7 +1234,7 @@ class LSEVisitor final : private HGraphDelegateVisitor {
return cohort.PrecedesBlock(blk);
});
};
- if (!can_throw_in_try_catch &&
+ if (!can_throw_inside_a_try &&
(ref_info->IsSingleton() ||
// partial and we aren't currently escaping and we haven't escaped yet.
(ref_info->IsPartialSingleton() && partial_singleton_did_not_escape(ref_info, blk)))) {
@@ -1235,8 +1283,8 @@ class LSEVisitor final : private HGraphDelegateVisitor {
}
void VisitNewInstance(HNewInstance* new_instance) override {
- // If we are in a try catch, even singletons are observable.
- const bool in_try_catch = new_instance->GetBlock()->GetTryCatchInformation() != nullptr;
+ // If we are in a try, even singletons are observable.
+ const bool inside_a_try = new_instance->GetBlock()->IsTryBlock();
ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_instance);
if (ref_info == nullptr) {
// new_instance isn't used for field accesses. No need to process it.
@@ -1265,7 +1313,7 @@ class LSEVisitor final : private HGraphDelegateVisitor {
heap_values[i].value = Value::ForInstruction(new_instance->GetLoadClass());
heap_values[i].stored_by = Value::PureUnknown();
}
- } else if (in_try_catch || IsEscapingObject(info, block, i)) {
+ } else if (inside_a_try || IsEscapingObject(info, block, i)) {
// Since NewInstance can throw, we presume all previous stores could be visible.
KeepStores(heap_values[i].stored_by);
heap_values[i].stored_by = Value::PureUnknown();
@@ -1274,8 +1322,8 @@ class LSEVisitor final : private HGraphDelegateVisitor {
}
void VisitNewArray(HNewArray* new_array) override {
- // If we are in a try catch, even singletons are observable.
- const bool in_try_catch = new_array->GetBlock()->GetTryCatchInformation() != nullptr;
+ // If we are in a try, even singletons are observable.
+ const bool inside_a_try = new_array->GetBlock()->IsTryBlock();
ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array);
if (ref_info == nullptr) {
// new_array isn't used for array accesses. No need to process it.
@@ -1300,7 +1348,7 @@ class LSEVisitor final : private HGraphDelegateVisitor {
// Array elements are set to default heap values.
heap_values[i].value = Value::Default();
heap_values[i].stored_by = Value::PureUnknown();
- } else if (in_try_catch || IsEscapingObject(info, block, i)) {
+ } else if (inside_a_try || IsEscapingObject(info, block, i)) {
// Since NewArray can throw, we presume all previous stores could be visible.
KeepStores(heap_values[i].stored_by);
heap_values[i].stored_by = Value::PureUnknown();
@@ -1704,8 +1752,7 @@ void LSEVisitor::MergePredecessorRecords(HBasicBlock* block) {
ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
DCHECK(heap_values.empty());
size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
- if (block->GetPredecessors().empty() || (block->GetTryCatchInformation() != nullptr &&
- block->GetTryCatchInformation()->IsCatchBlock())) {
+ if (block->GetPredecessors().empty() || block->IsCatchBlock()) {
DCHECK_IMPLIES(block->GetPredecessors().empty(), block->IsEntryBlock());
heap_values.resize(num_heap_locations,
{/*value=*/Value::PureUnknown(), /*stored_by=*/Value::PureUnknown()});
@@ -1764,7 +1811,6 @@ static HInstruction* FindOrConstructNonLoopPhi(
if (type == DataType::Type::kReference) {
// Update reference type information. Pass invalid handles, these are not used for Phis.
ReferenceTypePropagation rtp_fixup(block->GetGraph(),
- Handle<mirror::ClassLoader>(),
Handle<mirror::DexCache>(),
/* is_first_run= */ false);
rtp_fixup.Visit(phi);
@@ -1877,7 +1923,6 @@ void LSEVisitor::VisitGetLocation(HInstruction* instruction, size_t idx) {
}
HInstruction* heap_value = FindSubstitute(record.value.GetInstruction());
AddRemovedLoad(instruction, heap_value);
- TryRemovingNullCheck(instruction);
}
}
@@ -2068,9 +2113,15 @@ bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithDefault(
HInstruction* replacement = GetDefaultValue(type);
for (uint32_t phi_placeholder_index : visited.Indexes()) {
DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
- phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
+ PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index);
+ HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation());
+ // We use both vector and non vector operations to analyze the information. However, we replace
+ // only non vector operations in this code path.
+ if (!hl->IsVecOp()) {
+ phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
+ phi_placeholders_to_materialize->ClearBit(phi_placeholder_index);
+ }
}
- phi_placeholders_to_materialize->Subtract(&visited);
return true;
}
@@ -2125,9 +2176,15 @@ bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithSingleInput(
DCHECK(replacement != nullptr);
for (uint32_t phi_placeholder_index : visited.Indexes()) {
DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
- phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
+ PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index);
+ HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation());
+ // We use both vector and non vector operations to analyze the information. However, we replace
+ // only vector operations in this code path.
+ if (hl->IsVecOp()) {
+ phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
+ phi_placeholders_to_materialize->ClearBit(phi_placeholder_index);
+ }
}
- phi_placeholders_to_materialize->Subtract(&visited);
return true;
}
@@ -2352,7 +2409,6 @@ bool LSEVisitor::MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_inde
}
// Update reference type information. Pass invalid handles, these are not used for Phis.
ReferenceTypePropagation rtp_fixup(GetGraph(),
- Handle<mirror::ClassLoader>(),
Handle<mirror::DexCache>(),
/* is_first_run= */ false);
rtp_fixup.Visit(ArrayRef<HInstruction* const>(phis));
@@ -2639,7 +2695,6 @@ void LSEVisitor::ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unk
record.value = local_heap_values[idx];
HInstruction* heap_value = local_heap_values[idx].GetInstruction();
AddRemovedLoad(load_or_store, heap_value);
- TryRemovingNullCheck(load_or_store);
}
}
}
@@ -2698,7 +2753,6 @@ void LSEVisitor::ProcessLoadsRequiringLoopPhis() {
record.value = Replacement(record.value);
HInstruction* heap_value = record.value.GetInstruction();
AddRemovedLoad(load, heap_value);
- TryRemovingNullCheck(load);
}
}
}
@@ -3013,7 +3067,6 @@ class PartialLoadStoreEliminationHelper {
return;
}
ReferenceTypePropagation rtp_fixup(GetGraph(),
- Handle<mirror::ClassLoader>(),
Handle<mirror::DexCache>(),
/* is_first_run= */ false);
rtp_fixup.Visit(ArrayRef<HInstruction* const>(new_ref_phis_));
@@ -3333,7 +3386,7 @@ class PartialLoadStoreEliminationHelper {
ins->GetBlock()->InsertInstructionBefore(new_fget, ins);
if (ins->GetType() == DataType::Type::kReference) {
// Reference info is the same
- new_fget->SetReferenceTypeInfo(ins->GetReferenceTypeInfo());
+ new_fget->SetReferenceTypeInfoIfValid(ins->GetReferenceTypeInfo());
}
// In this phase, substitute instructions are used only for the predicated get
// default values which are used only if the partial singleton did not escape,
diff --git a/compiler/optimizing/load_store_elimination.h b/compiler/optimizing/load_store_elimination.h
index 6ad2eb2c51..42de803ebd 100644
--- a/compiler/optimizing/load_store_elimination.h
+++ b/compiler/optimizing/load_store_elimination.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_LOAD_STORE_ELIMINATION_H_
#define ART_COMPILER_OPTIMIZING_LOAD_STORE_ELIMINATION_H_
+#include "base/macros.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
class SideEffectsAnalysis;
diff --git a/compiler/optimizing/load_store_elimination_test.cc b/compiler/optimizing/load_store_elimination_test.cc
index 02dc939878..1ee109980f 100644
--- a/compiler/optimizing/load_store_elimination_test.cc
+++ b/compiler/optimizing/load_store_elimination_test.cc
@@ -36,7 +36,9 @@
#include "optimizing_unit_test.h"
#include "scoped_thread_state_change.h"
-namespace art {
+namespace art HIDDEN {
+
+static constexpr bool kDebugLseTests = false;
#define CHECK_SUBROUTINE_FAILURE() \
do { \
@@ -54,12 +56,16 @@ class LoadStoreEliminationTestBase : public SuperTest, public OptimizingUnitTest
void SetUp() override {
SuperTest::SetUp();
- gLogVerbosity.compiler = true;
+ if (kDebugLseTests) {
+ gLogVerbosity.compiler = true;
+ }
}
void TearDown() override {
SuperTest::TearDown();
- gLogVerbosity.compiler = false;
+ if (kDebugLseTests) {
+ gLogVerbosity.compiler = false;
+ }
}
void PerformLSE(bool with_partial = true) {
@@ -67,15 +73,40 @@ class LoadStoreEliminationTestBase : public SuperTest, public OptimizingUnitTest
LoadStoreElimination lse(graph_, /*stats=*/nullptr);
lse.Run(with_partial);
std::ostringstream oss;
- EXPECT_TRUE(CheckGraphSkipRefTypeInfoChecks(oss)) << oss.str();
+ EXPECT_TRUE(CheckGraph(oss)) << oss.str();
}
- void PerformLSEWithPartial() {
- PerformLSE(true);
+ void PerformLSEWithPartial(const AdjacencyListGraph& blks) {
+ // PerformLSE expects this to be empty.
+ graph_->ClearDominanceInformation();
+ if (kDebugLseTests) {
+ LOG(INFO) << "Pre LSE " << blks;
+ }
+ PerformLSE(/*with_partial=*/ true);
+ if (kDebugLseTests) {
+ LOG(INFO) << "Post LSE " << blks;
+ }
}
- void PerformLSENoPartial() {
- PerformLSE(false);
+ void PerformLSENoPartial(const AdjacencyListGraph& blks) {
+ // PerformLSE expects this to be empty.
+ graph_->ClearDominanceInformation();
+ if (kDebugLseTests) {
+ LOG(INFO) << "Pre LSE " << blks;
+ }
+ PerformLSE(/*with_partial=*/ false);
+ if (kDebugLseTests) {
+ LOG(INFO) << "Post LSE " << blks;
+ }
+ }
+
+ void PerformSimplifications(const AdjacencyListGraph& blks) {
+ InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
+ simp.Run();
+
+ if (kDebugLseTests) {
+ LOG(INFO) << "Post simplification " << blks;
+ }
}
// Create instructions shared among tests.
@@ -542,6 +573,7 @@ TEST_F(LoadStoreEliminationTest, SameHeapValue2) {
AddVecStore(entry_block_, array_, j_);
HInstruction* vstore = AddVecStore(entry_block_, array_, i_);
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vstore));
@@ -557,6 +589,7 @@ TEST_F(LoadStoreEliminationTest, SameHeapValue3) {
AddVecStore(entry_block_, array_, i_add1_);
HInstruction* vstore = AddVecStore(entry_block_, array_, i_);
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vstore));
@@ -601,6 +634,7 @@ TEST_F(LoadStoreEliminationTest, OverlappingLoadStore) {
AddArraySet(entry_block_, array_, i_, c1);
HInstruction* vload5 = AddVecLoad(entry_block_, array_, i_);
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_TRUE(IsRemoved(load1));
@@ -634,6 +668,7 @@ TEST_F(LoadStoreEliminationTest, StoreAfterLoopWithoutSideEffects) {
// a[j] = 1;
HInstruction* array_set = AddArraySet(return_block_, array_, j_, c1);
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_TRUE(IsRemoved(array_set));
@@ -671,6 +706,7 @@ TEST_F(LoadStoreEliminationTest, StoreAfterSIMDLoopWithSideEffects) {
// a[j] = 0;
HInstruction* a_set = AddArraySet(return_block_, array_, j_, c0);
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_TRUE(IsRemoved(vload));
@@ -709,6 +745,7 @@ TEST_F(LoadStoreEliminationTest, LoadAfterSIMDLoopWithSideEffects) {
// x = a[j];
HInstruction* load = AddArrayGet(return_block_, array_, j_);
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_TRUE(IsRemoved(vload));
@@ -749,6 +786,7 @@ TEST_F(LoadStoreEliminationTest, MergePredecessorVecStores) {
// down: a[i,... i + 3] = [1,...1]
HInstruction* vstore4 = AddVecStore(down, array_, i_, vdata);
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_TRUE(IsRemoved(vstore2));
@@ -839,6 +877,7 @@ TEST_F(LoadStoreEliminationTest, RedundantVStoreVLoadInLoop) {
HInstruction* vstore2 = AddVecStore(loop_, array_b, phi_, vload->AsVecLoad());
HInstruction* vstore3 = AddVecStore(loop_, array_a, phi_, vstore1->InputAt(2));
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vstore1));
@@ -894,7 +933,7 @@ TEST_F(LoadStoreEliminationTest, StoreAfterLoopWithSideEffects2) {
// loop:
// array2[i] = array[i]
// array[0] = 2
- HInstruction* store1 = AddArraySet(entry_block_, array_, c0, c2);
+ HInstruction* store1 = AddArraySet(pre_header_, array_, c0, c2);
HInstruction* load = AddArrayGet(loop_, array_, phi_);
HInstruction* store2 = AddArraySet(loop_, array2, phi_, load);
@@ -926,6 +965,7 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueInLoopWithoutWriteSideEffects)
HInstruction* vload = AddVecLoad(loop_, array_a, phi_);
HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload));
@@ -949,6 +989,7 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValue) {
HInstruction* vload = AddVecLoad(pre_header_, array_a, c0);
HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload));
@@ -1025,6 +1066,7 @@ TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValueInLoopWithoutWriteSideE
HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
HInstruction* store = AddArraySet(return_block_, array_, c0, load);
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload));
@@ -1055,6 +1097,7 @@ TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValue) {
HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
HInstruction* store = AddArraySet(return_block_, array_, c0, load);
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload));
@@ -1086,6 +1129,7 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoadInLoopWithoutWriteSide
HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad());
HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad());
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload1));
@@ -1116,6 +1160,7 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoad) {
HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad());
HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad());
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload1));
@@ -2024,10 +2069,7 @@ TEST_F(LoadStoreEliminationTest, PartialUnknownMerge) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSENoPartial();
+ PerformLSENoPartial(blks);
EXPECT_INS_RETAINED(read_bottom);
EXPECT_INS_RETAINED(write_c1);
@@ -2174,9 +2216,8 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved) {
HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
exit->AddInstruction(read_bottom);
exit->AddInstruction(return_exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- PerformLSENoPartial();
+
+ PerformLSENoPartial(blks);
EXPECT_INS_RETAINED(read_bottom) << *read_bottom;
EXPECT_INS_RETAINED(write_right) << *write_right;
@@ -2266,9 +2307,8 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved2) {
HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
exit->AddInstruction(read_bottom);
exit->AddInstruction(return_exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- PerformLSENoPartial();
+
+ PerformLSENoPartial(blks);
EXPECT_INS_RETAINED(read_bottom);
EXPECT_INS_RETAINED(write_right_first);
@@ -2499,11 +2539,7 @@ TEST_F(LoadStoreEliminationTest, PartialPhiPropagation) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HPredicatedInstanceFieldGet* pred_get =
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_);
@@ -2656,11 +2692,7 @@ TEST_P(OrderDependentTestGroup, PredicatedUse) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_RETAINED(call_left_left);
EXPECT_INS_REMOVED(read1);
@@ -2814,11 +2846,7 @@ TEST_P(OrderDependentTestGroup, PredicatedEnvUse) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HNewInstance* moved_new_inst1;
HInstanceFieldSet* moved_set1;
@@ -2954,11 +2982,7 @@ TEST_P(OrderDependentTestGroup, FieldSetOrderEnv) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(write_entry1);
EXPECT_INS_REMOVED(write_entry2);
@@ -3115,11 +3139,7 @@ TEST_P(OrderDependentTestGroup, MaterializationMovedUse) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(new_inst1);
EXPECT_INS_REMOVED(new_inst2);
@@ -3205,11 +3225,7 @@ TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HNewInstance* moved_new_inst = nullptr;
HInstanceFieldSet* moved_set = nullptr;
@@ -3320,11 +3336,7 @@ TEST_F(LoadStoreEliminationTest, MutiPartialLoadStore) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HNewInstance* moved_new_inst = nullptr;
HInstanceFieldSet* moved_set = nullptr;
@@ -3497,11 +3509,7 @@ TEST_F(LoadStoreEliminationTest, MutiPartialLoadStore2) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HNewInstance* moved_new_inst = nullptr;
HInstanceFieldSet* moved_set = nullptr;
@@ -3639,11 +3647,7 @@ TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc2) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HNewInstance* moved_new_inst;
HInstanceFieldSet* moved_set;
@@ -3746,11 +3750,7 @@ TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc3) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
// Each escaping switch path gets its own materialization block.
// Blocks:
@@ -3877,11 +3877,7 @@ TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc4) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(read_early);
EXPECT_EQ(return_early->InputAt(0), c0);
@@ -4013,11 +4009,7 @@ TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc5) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
// Normal LSE can get rid of these two.
EXPECT_INS_REMOVED(store_one);
@@ -4504,9 +4496,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved3) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- PerformLSENoPartial();
+ PerformLSENoPartial(blks);
EXPECT_INS_RETAINED(write_left_pre) << *write_left_pre;
EXPECT_INS_RETAINED(read_return) << *read_return;
@@ -4612,9 +4602,7 @@ TEST_F(LoadStoreEliminationTest, DISABLED_PartialLoadPreserved4) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- PerformLSENoPartial();
+ PerformLSENoPartial(blks);
EXPECT_INS_RETAINED(read_return);
EXPECT_INS_RETAINED(write_right);
@@ -4700,9 +4688,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved5) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- PerformLSENoPartial();
+ PerformLSENoPartial(blks);
EXPECT_INS_RETAINED(read_bottom);
EXPECT_INS_RETAINED(write_right);
@@ -4785,12 +4771,7 @@ TEST_F(LoadStoreEliminationTest, DISABLED_PartialLoadPreserved6) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
-
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSENoPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSENoPartial(blks);
EXPECT_INS_REMOVED(read_bottom);
EXPECT_INS_REMOVED(write_right);
@@ -4829,8 +4810,9 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonBeforeCohort) {
CreateGraph(/*handles=*/&vshs);
AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
"exit",
- {{"entry", "critical_break"},
- {"entry", "partial"},
+ {{"entry", "first_block"},
+ {"first_block", "critical_break"},
+ {"first_block", "partial"},
{"partial", "merge"},
{"critical_break", "merge"},
{"merge", "left"},
@@ -4839,7 +4821,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonBeforeCohort) {
{"right", "breturn"},
{"breturn", "exit"}}));
#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
+ GET_BLOCK(first_block);
GET_BLOCK(merge);
GET_BLOCK(partial);
GET_BLOCK(critical_break);
@@ -4858,12 +4840,12 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonBeforeCohort) {
HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32));
ComparisonInstructions cmp_instructions = GetComparisonInstructions(new_inst);
HInstruction* if_inst = new (GetAllocator()) HIf(cmp_instructions.cmp_);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(write_entry);
- cmp_instructions.AddSetup(entry);
- entry->AddInstruction(cmp_instructions.cmp_);
- entry->AddInstruction(if_inst);
+ first_block->AddInstruction(cls);
+ first_block->AddInstruction(new_inst);
+ first_block->AddInstruction(write_entry);
+ cmp_instructions.AddSetup(first_block);
+ first_block->AddInstruction(cmp_instructions.cmp_);
+ first_block->AddInstruction(if_inst);
ManuallyBuildEnvFor(cls, {});
cmp_instructions.AddEnvironment(cls->GetEnvironment());
new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
@@ -4897,12 +4879,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonBeforeCohort) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
-
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
std::vector<HPhi*> merges;
HPredicatedInstanceFieldGet* pred_get;
@@ -5026,11 +5003,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonInCohortBeforeEscape) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
std::vector<HPhi*> merges;
HInstanceFieldSet* init_set =
@@ -5157,11 +5130,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonAfterCohort) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
std::vector<HPhi*> merges;
HInstanceFieldSet* init_set =
@@ -5290,12 +5259,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonInCohortAfterEscape) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
-
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
std::vector<HPhi*> merges;
std::vector<HInstanceFieldSet*> sets;
@@ -5424,12 +5388,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedStore1) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
-
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_RETAINED(write_bottom);
EXPECT_TRUE(write_bottom->AsInstanceFieldSet()->GetIsPredicatedSet());
@@ -5539,11 +5498,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedStore2) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_RETAINED(write_bottom);
EXPECT_TRUE(write_bottom->AsInstanceFieldSet()->GetIsPredicatedSet()) << *write_bottom;
@@ -5627,11 +5582,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedLoad1) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(read_bottom);
EXPECT_INS_REMOVED(write_right);
@@ -5748,11 +5699,7 @@ TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad1) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(read_bottom1);
EXPECT_INS_REMOVED(read_bottom2);
@@ -5901,11 +5848,7 @@ TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad2) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(read_bottom1);
EXPECT_INS_REMOVED(read_bottom2);
@@ -6078,11 +6021,7 @@ TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad3) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(early_exit_left_read);
EXPECT_INS_REMOVED(early_exit_right_read);
@@ -6212,11 +6151,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedLoad4) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(read_bottom);
EXPECT_INS_REMOVED(read_right);
@@ -6334,11 +6269,7 @@ TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad4) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(read_bottom);
EXPECT_INS_REMOVED(read_early_return);
@@ -6447,11 +6378,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedLoad2) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(read_bottom);
EXPECT_INS_REMOVED(write_right);
@@ -6585,11 +6512,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedLoad3) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(read_bottom);
EXPECT_INS_REMOVED(write_right);
@@ -6688,11 +6611,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedLoadDefaultValue) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(read_bottom);
EXPECT_INS_RETAINED(write_left);
@@ -6861,11 +6780,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis1) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HPredicatedInstanceFieldGet* pred_get =
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
@@ -7045,11 +6960,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis2) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HPredicatedInstanceFieldGet* pred_get =
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
@@ -7196,11 +7107,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis3) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HPredicatedInstanceFieldGet* pred_get =
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
@@ -7344,11 +7251,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis4) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HPredicatedInstanceFieldGet* pred_get =
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
@@ -7492,11 +7395,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis5) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HPredicatedInstanceFieldGet* pred_get =
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
@@ -7657,11 +7556,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis6) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HPredicatedInstanceFieldGet* pred_get =
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
@@ -7757,17 +7652,10 @@ TEST_F(LoadStoreEliminationTest, SimplifyTest) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSE();
+ PerformLSEWithPartial(blks);
// Run the code-simplifier too
- LOG(INFO) << "Pre simplification " << blks;
- InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
- simp.Run();
-
- LOG(INFO) << "Post LSE " << blks;
+ PerformSimplifications(blks);
EXPECT_INS_REMOVED(write_right);
EXPECT_INS_REMOVED(write_start);
@@ -7851,17 +7739,10 @@ TEST_F(LoadStoreEliminationTest, SimplifyTest2) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSE();
+ PerformLSEWithPartial(blks);
// Run the code-simplifier too
- LOG(INFO) << "Pre simplification " << blks;
- InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
- simp.Run();
-
- LOG(INFO) << "Post LSE " << blks;
+ PerformSimplifications(blks);
EXPECT_INS_REMOVED(write_right);
EXPECT_INS_REMOVED(write_start);
@@ -7961,17 +7842,10 @@ TEST_F(LoadStoreEliminationTest, SimplifyTest3) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSE();
+ PerformLSEWithPartial(blks);
// Run the code-simplifier too
- LOG(INFO) << "Pre simplification " << blks;
- InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
- simp.Run();
-
- LOG(INFO) << "Post LSE " << blks;
+ PerformSimplifications(blks);
EXPECT_INS_REMOVED(write_case2);
EXPECT_INS_REMOVED(write_case3);
@@ -8069,17 +7943,10 @@ TEST_F(LoadStoreEliminationTest, SimplifyTest4) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSE();
+ PerformLSEWithPartial(blks);
// Run the code-simplifier too
- LOG(INFO) << "Pre simplification " << blks;
- InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
- simp.Run();
-
- LOG(INFO) << "Post LSE " << blks;
+ PerformSimplifications(blks);
EXPECT_INS_REMOVED(write_case2);
EXPECT_INS_REMOVED(write_case3);
@@ -8225,11 +8092,7 @@ TEST_F(LoadStoreEliminationTest, PartialIrreducibleLoop) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSE();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_TRUE(loop_header->IsLoopHeader());
EXPECT_TRUE(loop_header->GetLoopInformation()->IsIrreducible());
@@ -8382,11 +8245,7 @@ TEST_P(UsesOrderDependentTestGroup, RecordPredicatedReplacements1) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSE();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_RETAINED(cls);
EXPECT_INS_REMOVED(new_inst);
@@ -8544,11 +8403,7 @@ TEST_P(UsesOrderDependentTestGroup, RecordPredicatedReplacements2) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSE();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_RETAINED(cls);
EXPECT_INS_REMOVED(new_inst);
@@ -8752,11 +8607,7 @@ TEST_P(UsesOrderDependentTestGroupForThreeItems, RecordPredicatedReplacements3)
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSE();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_RETAINED(cls);
EXPECT_INS_REMOVED(new_inst);
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index 5879c6fa07..f40b7f4f0c 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -21,7 +21,7 @@
#include "code_generator.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
// Verify that Location is trivially copyable.
static_assert(std::is_trivially_copyable<Location>::value, "Location should be trivially copyable");
@@ -57,7 +57,7 @@ LocationSummary::LocationSummary(HInstruction* instruction,
Location Location::RegisterOrConstant(HInstruction* instruction) {
return instruction->IsConstant()
- ? Location::ConstantLocation(instruction->AsConstant())
+ ? Location::ConstantLocation(instruction)
: Location::RequiresRegister();
}
@@ -85,16 +85,23 @@ Location Location::FpuRegisterOrInt32Constant(HInstruction* instruction) {
Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) {
return instruction->IsConstant()
- ? Location::ConstantLocation(instruction->AsConstant())
+ ? Location::ConstantLocation(instruction)
: Location::RegisterLocation(reg);
}
Location Location::FpuRegisterOrConstant(HInstruction* instruction) {
return instruction->IsConstant()
- ? Location::ConstantLocation(instruction->AsConstant())
+ ? Location::ConstantLocation(instruction)
: Location::RequiresFpuRegister();
}
+void Location::DCheckInstructionIsConstant(HInstruction* instruction) {
+ DCHECK(instruction != nullptr);
+ DCHECK(instruction->IsConstant());
+ DCHECK_EQ(reinterpret_cast<uintptr_t>(instruction),
+ reinterpret_cast<uintptr_t>(instruction->AsConstant()));
+}
+
std::ostream& operator<<(std::ostream& os, const Location& location) {
os << location.DebugString();
if (location.IsRegister() || location.IsFpuRegister()) {
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index acaea71a49..7ee076f442 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -22,9 +22,10 @@
#include "base/bit_field.h"
#include "base/bit_utils.h"
#include "base/bit_vector.h"
+#include "base/macros.h"
#include "base/value_object.h"
-namespace art {
+namespace art HIDDEN {
class HConstant;
class HInstruction;
@@ -102,8 +103,12 @@ class Location : public ValueObject {
return (value_ & kLocationConstantMask) == kConstant;
}
- static Location ConstantLocation(HConstant* constant) {
+ static Location ConstantLocation(HInstruction* constant) {
DCHECK(constant != nullptr);
+ if (kIsDebugBuild) {
+ // Call out-of-line helper to avoid circular dependency with `nodes.h`.
+ DCheckInstructionIsConstant(constant);
+ }
return Location(kConstant | reinterpret_cast<uintptr_t>(constant));
}
@@ -425,6 +430,8 @@ class Location : public ValueObject {
return PayloadField::Decode(value_);
}
+ static void DCheckInstructionIsConstant(HInstruction* instruction);
+
using KindField = BitField<Kind, 0, kBitsForKind>;
using PayloadField = BitField<uintptr_t, kBitsForKind, kBitsForPayload>;
diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc
index 76bd8493b2..95e81533da 100644
--- a/compiler/optimizing/loop_analysis.cc
+++ b/compiler/optimizing/loop_analysis.cc
@@ -20,7 +20,7 @@
#include "code_generator.h"
#include "induction_var_range.h"
-namespace art {
+namespace art HIDDEN {
void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info,
LoopAnalysisInfo* analysis_results,
diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h
index fbf1516f64..cec00fecf4 100644
--- a/compiler/optimizing/loop_analysis.h
+++ b/compiler/optimizing/loop_analysis.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_
#define ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_
+#include "base/macros.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
class InductionVarRange;
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 2d7c20825c..7a52502562 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -27,7 +27,7 @@
#include "mirror/array-inl.h"
#include "mirror/string.h"
-namespace art {
+namespace art HIDDEN {
// Enables vectorization (SIMDization) in the loop optimizer.
static constexpr bool kEnableVectorization = true;
@@ -507,9 +507,8 @@ bool HLoopOptimization::Run() {
graph_->SetHasLoops(false); // no more loops
}
- // Detach.
+ // Detach allocator.
loop_allocator_ = nullptr;
- last_loop_ = top_loop_ = nullptr;
return did_loop_opt;
}
@@ -530,11 +529,7 @@ bool HLoopOptimization::LocalRun() {
AddLoop(block->GetLoopInformation());
}
}
-
- // TODO(solanes): How can `top_loop_` be null if `graph_->HasLoops()` is true?
- if (top_loop_ == nullptr) {
- return false;
- }
+ DCHECK(top_loop_ != nullptr);
// Traverse the loop hierarchy inner-to-outer and optimize. Traversal can use
// temporary data structures using the phase-local allocator. All new HIR
@@ -681,6 +676,50 @@ void HLoopOptimization::CalculateAndSetTryCatchKind(LoopNode* node) {
}
//
+// This optimization applies to loops with plain simple operations
+// (I.e. no calls to java code or runtime) with a known small trip_count * instr_count
+// value.
+//
+bool HLoopOptimization::TryToRemoveSuspendCheckFromLoopHeader(LoopAnalysisInfo* analysis_info,
+ bool generate_code) {
+ if (!graph_->SuspendChecksAreAllowedToNoOp()) {
+ return false;
+ }
+
+ int64_t trip_count = analysis_info->GetTripCount();
+
+ if (trip_count == LoopAnalysisInfo::kUnknownTripCount) {
+ return false;
+ }
+
+ int64_t instruction_count = analysis_info->GetNumberOfInstructions();
+ int64_t total_instruction_count = trip_count * instruction_count;
+
+ // The inclusion of the HasInstructionsPreventingScalarOpts() prevents this
+ // optimization from being applied to loops that have calls.
+ bool can_optimize =
+ total_instruction_count <= HLoopOptimization::kMaxTotalInstRemoveSuspendCheck &&
+ !analysis_info->HasInstructionsPreventingScalarOpts();
+
+ if (!can_optimize) {
+ return false;
+ }
+
+ // If we should do the optimization, disable codegen for the SuspendCheck.
+ if (generate_code) {
+ HLoopInformation* loop_info = analysis_info->GetLoopInfo();
+ HBasicBlock* header = loop_info->GetHeader();
+ HSuspendCheck* instruction = header->GetLoopInformation()->GetSuspendCheck();
+ // As other optimizations depend on SuspendCheck
+ // (e.g: CHAGuardVisitor::HoistGuard), disable its codegen instead of
+ // removing the SuspendCheck instruction.
+ instruction->SetIsNoOp(true);
+ }
+
+ return true;
+}
+
+//
// Optimization.
//
@@ -824,7 +863,7 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {
}
bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
- return TryOptimizeInnerLoopFinite(node) || TryPeelingAndUnrolling(node);
+ return TryOptimizeInnerLoopFinite(node) || TryLoopScalarOpts(node);
}
//
@@ -928,7 +967,7 @@ bool HLoopOptimization::TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool g
return true;
}
-bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) {
+bool HLoopOptimization::TryLoopScalarOpts(LoopNode* node) {
HLoopInformation* loop_info = node->loop_info;
int64_t trip_count = LoopAnalysis::GetLoopTripCount(loop_info, &induction_range_);
LoopAnalysisInfo analysis_info(loop_info);
@@ -941,10 +980,16 @@ bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) {
if (!TryFullUnrolling(&analysis_info, /*generate_code*/ false) &&
!TryPeelingForLoopInvariantExitsElimination(&analysis_info, /*generate_code*/ false) &&
- !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false)) {
+ !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false) &&
+ !TryToRemoveSuspendCheckFromLoopHeader(&analysis_info, /*generate_code*/ false)) {
return false;
}
+ // Try the suspend check removal even for non-clonable loops. Also this
+ // optimization doesn't interfere with other scalar loop optimizations so it can
+ // be done prior to them.
+ bool removed_suspend_check = TryToRemoveSuspendCheckFromLoopHeader(&analysis_info);
+
// Run 'IsLoopClonable' the last as it might be time-consuming.
if (!LoopClonerHelper::IsLoopClonable(loop_info)) {
return false;
@@ -952,7 +997,7 @@ bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) {
return TryFullUnrolling(&analysis_info) ||
TryPeelingForLoopInvariantExitsElimination(&analysis_info) ||
- TryUnrollingForBranchPenaltyReduction(&analysis_info);
+ TryUnrollingForBranchPenaltyReduction(&analysis_info) || removed_suspend_check;
}
//
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index b17861648f..6dd778ba74 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
#define ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
+#include "base/macros.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "induction_var_range.h"
@@ -25,7 +26,7 @@
#include "optimization.h"
#include "superblock_cloner.h"
-namespace art {
+namespace art HIDDEN {
class CompilerOptions;
class ArchNoOptsLoopHelper;
@@ -47,6 +48,11 @@ class HLoopOptimization : public HOptimization {
static constexpr const char* kLoopOptimizationPassName = "loop_optimization";
+ // The maximum number of total instructions (trip_count * instruction_count),
+ // where the optimization of removing SuspendChecks from the loop header could
+ // be performed.
+ static constexpr int64_t kMaxTotalInstRemoveSuspendCheck = 128;
+
private:
/**
* A single loop inside the loop hierarchy representation.
@@ -179,8 +185,19 @@ class HLoopOptimization : public HOptimization {
// should be actually applied.
bool TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool generate_code = true);
- // Tries to apply scalar loop peeling and unrolling.
- bool TryPeelingAndUnrolling(LoopNode* node);
+ // Tries to remove SuspendCheck for plain loops with a low trip count. The
+ // SuspendCheck in the codegen makes sure that the thread can be interrupted
+ // during execution for GC. Not being able to do so might decrease the
+ // responsiveness of GC when a very long loop or a long recursion is being
+ // executed. However, for plain loops with a small trip count, the removal of
+ // SuspendCheck should not affect the GC's responsiveness by a large margin.
+ // Consequently, since the thread won't be interrupted for plain loops, it is
+ // assumed that the performance might increase by removing SuspendCheck.
+ bool TryToRemoveSuspendCheckFromLoopHeader(LoopAnalysisInfo* analysis_info,
+ bool generate_code = true);
+
+ // Tries to apply scalar loop optimizations.
+ bool TryLoopScalarOpts(LoopNode* node);
//
// Vectorization analysis and synthesis.
diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc
index bda25283f5..7f694fb655 100644
--- a/compiler/optimizing/loop_optimization_test.cc
+++ b/compiler/optimizing/loop_optimization_test.cc
@@ -14,12 +14,13 @@
* limitations under the License.
*/
+#include "base/macros.h"
#include "code_generator.h"
#include "driver/compiler_options.h"
#include "loop_optimization.h"
#include "optimizing_unit_test.h"
-namespace art {
+namespace art HIDDEN {
/**
* Fixture class for the loop optimization tests. These unit tests focus
@@ -94,10 +95,7 @@ class LoopOptimizationTest : public OptimizingUnitTest {
void PerformAnalysis() {
graph_->BuildDominatorTree();
iva_->Run();
- // Do not release the loop hierarchy.
- ScopedArenaAllocator loop_allocator(GetArenaStack());
- loop_opt_->loop_allocator_ = &loop_allocator;
- loop_opt_->LocalRun();
+ loop_opt_->Run();
}
/** Constructs string representation of computed loop hierarchy. */
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index d35ed1c543..3790058879 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -40,7 +40,7 @@
#include "scoped_thread_state_change-inl.h"
#include "ssa_builder.h"
-namespace art {
+namespace art HIDDEN {
// Enable floating-point static evaluation during constant folding
// only if all floating-point operations and constants evaluate in the
@@ -150,30 +150,54 @@ static void RemoveAsUser(HInstruction* instruction) {
RemoveEnvironmentUses(instruction);
}
-void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const {
+void HGraph::RemoveDeadBlocksInstructionsAsUsersAndDisconnect(const ArenaBitVector& visited) const {
for (size_t i = 0; i < blocks_.size(); ++i) {
if (!visited.IsBitSet(i)) {
HBasicBlock* block = blocks_[i];
if (block == nullptr) continue;
+
+ // Remove as user.
for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
RemoveAsUser(it.Current());
}
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
RemoveAsUser(it.Current());
}
+
+ // Remove non-catch phi uses, and disconnect the block.
+ block->DisconnectFromSuccessors(&visited);
+ }
+ }
+}
+
+// This method assumes `insn` has been removed from all users with the exception of catch
+// phis because of missing exceptional edges in the graph. It removes the
+// instruction from catch phi uses, together with inputs of other catch phis in
+// the catch block at the same index, as these must be dead too.
+static void RemoveCatchPhiUsesOfDeadInstruction(HInstruction* insn) {
+ DCHECK(!insn->HasEnvironmentUses());
+ while (insn->HasNonEnvironmentUses()) {
+ const HUseListNode<HInstruction*>& use = insn->GetUses().front();
+ size_t use_index = use.GetIndex();
+ HBasicBlock* user_block = use.GetUser()->GetBlock();
+ DCHECK(use.GetUser()->IsPhi());
+ DCHECK(user_block->IsCatchBlock());
+ for (HInstructionIterator phi_it(user_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+ phi_it.Current()->AsPhi()->RemoveInputAt(use_index);
}
}
}
void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) {
+ DCHECK(reverse_post_order_.empty()) << "We shouldn't have dominance information.";
for (size_t i = 0; i < blocks_.size(); ++i) {
if (!visited.IsBitSet(i)) {
HBasicBlock* block = blocks_[i];
if (block == nullptr) continue;
- // We only need to update the successor, which might be live.
- for (HBasicBlock* successor : block->GetSuccessors()) {
- successor->RemovePredecessor(block);
- }
+
+ // Remove all remaining uses (which should be only catch phi uses), and the instructions.
+ block->RemoveCatchPhiUsesAndInstruction(/* building_dominator_tree = */ true);
+
// Remove the block from the list of blocks, so that further analyses
// never see it.
blocks_[i] = nullptr;
@@ -200,7 +224,8 @@ GraphAnalysisResult HGraph::BuildDominatorTree() {
// (2) Remove instructions and phis from blocks not visited during
// the initial DFS as users from other instructions, so that
// users can be safely removed before uses later.
- RemoveInstructionsAsUsersFromDeadBlocks(visited);
+ // Also disconnect the block from its successors, updating the successor's phis if needed.
+ RemoveDeadBlocksInstructionsAsUsersAndDisconnect(visited);
// (3) Remove blocks not visited during the initial DFS.
// Step (5) requires dead blocks to be removed from the
@@ -237,6 +262,7 @@ void HGraph::ClearDominanceInformation() {
}
void HGraph::ClearLoopInformation() {
+ SetHasLoops(false);
SetHasIrreducibleLoops(false);
for (HBasicBlock* block : GetActiveBlocks()) {
block->SetLoopInformation(nullptr);
@@ -544,6 +570,15 @@ void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) {
}
}
+HBasicBlock* HGraph::SplitEdgeAndUpdateRPO(HBasicBlock* block, HBasicBlock* successor) {
+ HBasicBlock* new_block = SplitEdge(block, successor);
+ // In the RPO we have {... , block, ... , successor}. We want to insert `new_block` right after
+ // `block` to have a consistent RPO without recomputing the whole graph's RPO.
+ reverse_post_order_.insert(
+ reverse_post_order_.begin() + IndexOfElement(reverse_post_order_, block) + 1, new_block);
+ return new_block;
+}
+
// Reorder phi inputs to match reordering of the block's predecessors.
static void FixPhisAfterPredecessorsReodering(HBasicBlock* block, size_t first, size_t second) {
for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
@@ -653,7 +688,7 @@ void HGraph::TransformLoopToSinglePreheaderFormat(HBasicBlock* header) {
0,
header_phi->GetType());
if (header_phi->GetType() == DataType::Type::kReference) {
- preheader_phi->SetReferenceTypeInfo(header_phi->GetReferenceTypeInfo());
+ preheader_phi->SetReferenceTypeInfoIfValid(header_phi->GetReferenceTypeInfo());
}
preheader->AddPhi(preheader_phi);
@@ -708,6 +743,8 @@ void HGraph::SimplifyLoop(HBasicBlock* header) {
void HGraph::ComputeTryBlockInformation() {
// Iterate in reverse post order to propagate try membership information from
// predecessors to their successors.
+ bool graph_has_try_catch = false;
+
for (HBasicBlock* block : GetReversePostOrder()) {
if (block->IsEntryBlock() || block->IsCatchBlock()) {
// Catch blocks after simplification have only exceptional predecessors
@@ -722,6 +759,7 @@ void HGraph::ComputeTryBlockInformation() {
DCHECK_IMPLIES(block->IsLoopHeader(),
!block->GetLoopInformation()->IsBackEdge(*first_predecessor));
const HTryBoundary* try_entry = first_predecessor->ComputeTryEntryOfSuccessors();
+ graph_has_try_catch |= try_entry != nullptr;
if (try_entry != nullptr &&
(block->GetTryCatchInformation() == nullptr ||
try_entry != &block->GetTryCatchInformation()->GetTryEntry())) {
@@ -730,6 +768,8 @@ void HGraph::ComputeTryBlockInformation() {
block->SetTryCatchInformation(new (allocator_) TryCatchInformation(*try_entry));
}
}
+
+ SetHasTryCatch(graph_has_try_catch);
}
void HGraph::SimplifyCFG() {
@@ -1459,6 +1499,10 @@ bool HInstructionList::FoundBefore(const HInstruction* instruction1,
UNREACHABLE();
}
+bool HInstruction::Dominates(HInstruction* other_instruction) const {
+ return other_instruction == this || StrictlyDominates(other_instruction);
+}
+
bool HInstruction::StrictlyDominates(HInstruction* other_instruction) const {
if (other_instruction == this) {
// An instruction does not strictly dominate itself.
@@ -1518,14 +1562,19 @@ void HInstruction::ReplaceWith(HInstruction* other) {
DCHECK(env_uses_.empty());
}
-void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement) {
+void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator,
+ HInstruction* replacement,
+ bool strictly_dominated) {
const HUseList<HInstruction*>& uses = GetUses();
for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
HInstruction* user = it->GetUser();
size_t index = it->GetIndex();
// Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
++it;
- if (dominator->StrictlyDominates(user)) {
+ const bool dominated =
+ strictly_dominated ? dominator->StrictlyDominates(user) : dominator->Dominates(user);
+
+ if (dominated) {
user->ReplaceInput(replacement, index);
} else if (user->IsPhi() && !user->AsPhi()->IsCatchPhi()) {
// If the input flows from a block dominated by `dominator`, we can replace it.
@@ -2108,8 +2157,9 @@ void HInstruction::MoveBeforeFirstUserAndOutOfLoops() {
MoveBefore(insert_pos);
}
-HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) {
- DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
+HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor, bool require_graph_not_in_ssa_form) {
+ DCHECK_IMPLIES(require_graph_not_in_ssa_form, !graph_->IsInSsaForm())
+ << "Support for SSA form not implemented.";
DCHECK_EQ(cursor->GetBlock(), this);
HBasicBlock* new_block =
@@ -2376,24 +2426,6 @@ void HInstructionList::Add(const HInstructionList& instruction_list) {
}
}
-// Should be called on instructions in a dead block in post order. This method
-// assumes `insn` has been removed from all users with the exception of catch
-// phis because of missing exceptional edges in the graph. It removes the
-// instruction from catch phi uses, together with inputs of other catch phis in
-// the catch block at the same index, as these must be dead too.
-static void RemoveUsesOfDeadInstruction(HInstruction* insn) {
- DCHECK(!insn->HasEnvironmentUses());
- while (insn->HasNonEnvironmentUses()) {
- const HUseListNode<HInstruction*>& use = insn->GetUses().front();
- size_t use_index = use.GetIndex();
- HBasicBlock* user_block = use.GetUser()->GetBlock();
- DCHECK(use.GetUser()->IsPhi() && user_block->IsCatchBlock());
- for (HInstructionIterator phi_it(user_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
- phi_it.Current()->AsPhi()->RemoveInputAt(use_index);
- }
- }
-}
-
void HBasicBlock::DisconnectAndDelete() {
// Dominators must be removed after all the blocks they dominate. This way
// a loop header is removed last, a requirement for correct loop information
@@ -2418,52 +2450,14 @@ void HBasicBlock::DisconnectAndDelete() {
}
// (2) Disconnect the block from its successors and update their phis.
- for (HBasicBlock* successor : successors_) {
- // Delete this block from the list of predecessors.
- size_t this_index = successor->GetPredecessorIndexOf(this);
- successor->predecessors_.erase(successor->predecessors_.begin() + this_index);
-
- // Check that `successor` has other predecessors, otherwise `this` is the
- // dominator of `successor` which violates the order DCHECKed at the top.
- DCHECK(!successor->predecessors_.empty());
-
- // Remove this block's entries in the successor's phis. Skip exceptional
- // successors because catch phi inputs do not correspond to predecessor
- // blocks but throwing instructions. The inputs of the catch phis will be
- // updated in step (3).
- if (!successor->IsCatchBlock()) {
- if (successor->predecessors_.size() == 1u) {
- // The successor has just one predecessor left. Replace phis with the only
- // remaining input.
- for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
- HPhi* phi = phi_it.Current()->AsPhi();
- phi->ReplaceWith(phi->InputAt(1 - this_index));
- successor->RemovePhi(phi);
- }
- } else {
- for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
- phi_it.Current()->AsPhi()->RemoveInputAt(this_index);
- }
- }
- }
- }
- successors_.clear();
+ DisconnectFromSuccessors();
// (3) Remove instructions and phis. Instructions should have no remaining uses
// except in catch phis. If an instruction is used by a catch phi at `index`,
// remove `index`-th input of all phis in the catch block since they are
// guaranteed dead. Note that we may miss dead inputs this way but the
// graph will always remain consistent.
- for (HBackwardInstructionIterator it(GetInstructions()); !it.Done(); it.Advance()) {
- HInstruction* insn = it.Current();
- RemoveUsesOfDeadInstruction(insn);
- RemoveInstruction(insn);
- }
- for (HInstructionIterator it(GetPhis()); !it.Done(); it.Advance()) {
- HPhi* insn = it.Current()->AsPhi();
- RemoveUsesOfDeadInstruction(insn);
- RemovePhi(insn);
- }
+ RemoveCatchPhiUsesAndInstruction(/* building_dominator_tree = */ false);
// (4) Disconnect the block from its predecessors and update their
// control-flow instructions.
@@ -2537,6 +2531,70 @@ void HBasicBlock::DisconnectAndDelete() {
SetGraph(nullptr);
}
+void HBasicBlock::DisconnectFromSuccessors(const ArenaBitVector* visited) {
+ for (HBasicBlock* successor : successors_) {
+ // Delete this block from the list of predecessors.
+ size_t this_index = successor->GetPredecessorIndexOf(this);
+ successor->predecessors_.erase(successor->predecessors_.begin() + this_index);
+
+ if (visited != nullptr && !visited->IsBitSet(successor->GetBlockId())) {
+ // `successor` itself is dead. Therefore, there is no need to update its phis.
+ continue;
+ }
+
+ DCHECK(!successor->predecessors_.empty());
+
+ // Remove this block's entries in the successor's phis. Skips exceptional
+ // successors because catch phi inputs do not correspond to predecessor
+ // blocks but throwing instructions. They are removed in `RemoveCatchPhiUses`.
+ if (!successor->IsCatchBlock()) {
+ if (successor->predecessors_.size() == 1u) {
+ // The successor has just one predecessor left. Replace phis with the only
+ // remaining input.
+ for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+ HPhi* phi = phi_it.Current()->AsPhi();
+ phi->ReplaceWith(phi->InputAt(1 - this_index));
+ successor->RemovePhi(phi);
+ }
+ } else {
+ for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+ phi_it.Current()->AsPhi()->RemoveInputAt(this_index);
+ }
+ }
+ }
+ }
+ successors_.clear();
+}
+
+void HBasicBlock::RemoveCatchPhiUsesAndInstruction(bool building_dominator_tree) {
+ for (HBackwardInstructionIterator it(GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* insn = it.Current();
+ RemoveCatchPhiUsesOfDeadInstruction(insn);
+
+ // If we are building the dominator tree, we removed all input records previously.
+ // `RemoveInstruction` will try to remove them again but that's not something we support and we
+ // will crash. We check here since we won't be checking that in RemoveInstruction.
+ if (building_dominator_tree) {
+ DCHECK(insn->GetUses().empty());
+ DCHECK(insn->GetEnvUses().empty());
+ }
+ RemoveInstruction(insn, /* ensure_safety= */ !building_dominator_tree);
+ }
+ for (HInstructionIterator it(GetPhis()); !it.Done(); it.Advance()) {
+ HPhi* insn = it.Current()->AsPhi();
+ RemoveCatchPhiUsesOfDeadInstruction(insn);
+
+ // If we are building the dominator tree, we removed all input records previously.
+ // `RemovePhi` will try to remove them again but that's not something we support and we
+ // will crash. We check here since we won't be checking that in RemovePhi.
+ if (building_dominator_tree) {
+ DCHECK(insn->GetUses().empty());
+ DCHECK(insn->GetEnvUses().empty());
+ }
+ RemovePhi(insn, /* ensure_safety= */ !building_dominator_tree);
+ }
+}
+
void HBasicBlock::MergeInstructionsWith(HBasicBlock* other) {
DCHECK(EndsWithControlFlowInstruction());
RemoveInstruction(GetLastInstruction());
@@ -2660,7 +2718,8 @@ void HGraph::DeleteDeadEmptyBlock(HBasicBlock* block) {
void HGraph::UpdateLoopAndTryInformationOfNewBlock(HBasicBlock* block,
HBasicBlock* reference,
- bool replace_if_back_edge) {
+ bool replace_if_back_edge,
+ bool has_more_specific_try_catch_info) {
if (block->IsLoopHeader()) {
// Clear the information of which blocks are contained in that loop. Since the
// information is stored as a bit vector based on block ids, we have to update
@@ -2687,11 +2746,16 @@ void HGraph::UpdateLoopAndTryInformationOfNewBlock(HBasicBlock* block,
}
}
- // Copy TryCatchInformation if `reference` is a try block, not if it is a catch block.
- TryCatchInformation* try_catch_info = reference->IsTryBlock()
- ? reference->GetTryCatchInformation()
- : nullptr;
- block->SetTryCatchInformation(try_catch_info);
+ DCHECK_IMPLIES(has_more_specific_try_catch_info, !reference->IsTryBlock())
+ << "We don't allow to inline try catches inside of other try blocks.";
+
+ // Update the TryCatchInformation, if we are not inlining a try catch.
+ if (!has_more_specific_try_catch_info) {
+ // Copy TryCatchInformation if `reference` is a try block, not if it is a catch block.
+ TryCatchInformation* try_catch_info =
+ reference->IsTryBlock() ? reference->GetTryCatchInformation() : nullptr;
+ block->SetTryCatchInformation(try_catch_info);
+ }
}
HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
@@ -2730,9 +2794,15 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
if (HasTryCatch()) {
outer_graph->SetHasTryCatch(true);
}
+ if (HasMonitorOperations()) {
+ outer_graph->SetHasMonitorOperations(true);
+ }
if (HasSIMD()) {
outer_graph->SetHasSIMD(true);
}
+ if (HasAlwaysThrowingInvokes()) {
+ outer_graph->SetHasAlwaysThrowingInvokes(true);
+ }
HInstruction* return_value = nullptr;
if (GetBlocks().size() == 3) {
@@ -2771,6 +2841,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
HBasicBlock* first = entry_block_->GetSuccessors()[0];
DCHECK(!first->IsInLoop());
+ DCHECK(first->GetTryCatchInformation() == nullptr);
at->MergeWithInlined(first);
exit_block_->ReplaceWith(to);
@@ -2801,12 +2872,14 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
// and (4) to the blocks that apply.
for (HBasicBlock* current : GetReversePostOrder()) {
if (current != exit_block_ && current != entry_block_ && current != first) {
- DCHECK(current->GetTryCatchInformation() == nullptr);
DCHECK(current->GetGraph() == this);
current->SetGraph(outer_graph);
outer_graph->AddBlock(current);
outer_graph->reverse_post_order_[++index_of_at] = current;
- UpdateLoopAndTryInformationOfNewBlock(current, at, /* replace_if_back_edge= */ false);
+ UpdateLoopAndTryInformationOfNewBlock(current,
+ at,
+ /* replace_if_back_edge= */ false,
+ current->GetTryCatchInformation() != nullptr);
}
}
@@ -2820,25 +2893,62 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
// Update all predecessors of the exit block (now the `to` block)
// to not `HReturn` but `HGoto` instead. Special case throwing blocks
- // to now get the outer graph exit block as successor. Note that the inliner
- // currently doesn't support inlining methods with try/catch.
+ // to now get the outer graph exit block as successor.
HPhi* return_value_phi = nullptr;
bool rerun_dominance = false;
bool rerun_loop_analysis = false;
for (size_t pred = 0; pred < to->GetPredecessors().size(); ++pred) {
HBasicBlock* predecessor = to->GetPredecessors()[pred];
HInstruction* last = predecessor->GetLastInstruction();
+
+ // At this point we might either have:
+ // A) Return/ReturnVoid/Throw as the last instruction, or
+ // B) `Return/ReturnVoid/Throw->TryBoundary` as the last instruction chain
+
+ const bool saw_try_boundary = last->IsTryBoundary();
+ if (saw_try_boundary) {
+ DCHECK(predecessor->IsSingleTryBoundary());
+ DCHECK(!last->AsTryBoundary()->IsEntry());
+ predecessor = predecessor->GetSinglePredecessor();
+ last = predecessor->GetLastInstruction();
+ }
+
if (last->IsThrow()) {
- DCHECK(!at->IsTryBlock());
- predecessor->ReplaceSuccessor(to, outer_graph->GetExitBlock());
+ if (at->IsTryBlock()) {
+ DCHECK(!saw_try_boundary) << "We don't support inlining of try blocks into try blocks.";
+ // Create a TryBoundary of kind:exit and point it to the Exit block.
+ HBasicBlock* new_block = outer_graph->SplitEdge(predecessor, to);
+ new_block->AddInstruction(
+ new (allocator) HTryBoundary(HTryBoundary::BoundaryKind::kExit, last->GetDexPc()));
+ new_block->ReplaceSuccessor(to, outer_graph->GetExitBlock());
+
+ // Copy information from the predecessor.
+ new_block->SetLoopInformation(predecessor->GetLoopInformation());
+ TryCatchInformation* try_catch_info = predecessor->GetTryCatchInformation();
+ new_block->SetTryCatchInformation(try_catch_info);
+ for (HBasicBlock* xhandler :
+ try_catch_info->GetTryEntry().GetBlock()->GetExceptionalSuccessors()) {
+ new_block->AddSuccessor(xhandler);
+ }
+ DCHECK(try_catch_info->GetTryEntry().HasSameExceptionHandlersAs(
+ *new_block->GetLastInstruction()->AsTryBoundary()));
+ } else {
+ // We either have `Throw->TryBoundary` or `Throw`. We want to point the whole chain to the
+ // exit, so we recompute `predecessor`
+ predecessor = to->GetPredecessors()[pred];
+ predecessor->ReplaceSuccessor(to, outer_graph->GetExitBlock());
+ }
+
--pred;
// We need to re-run dominance information, as the exit block now has
- // a new dominator.
+ // a new predecessor and potential new dominator.
+ // TODO(solanes): See if it's worth it to hand-modify the domination chain instead of
+ // rerunning the dominance for the whole graph.
rerun_dominance = true;
if (predecessor->GetLoopInformation() != nullptr) {
- // The exit block and blocks post dominated by the exit block do not belong
- // to any loop. Because we do not compute the post dominators, we need to re-run
- // loop analysis to get the loop information correct.
+ // The loop information might have changed e.g. `predecessor` might not be in a loop
+ // anymore. We only do this if `predecessor` has loop information as it is impossible for
+ // predecessor to end up in a loop if it wasn't in one before.
rerun_loop_analysis = true;
}
} else {
@@ -2863,6 +2973,19 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
}
predecessor->AddInstruction(new (allocator) HGoto(last->GetDexPc()));
predecessor->RemoveInstruction(last);
+
+ if (saw_try_boundary) {
+ predecessor = to->GetPredecessors()[pred];
+ DCHECK(predecessor->EndsWithTryBoundary());
+ DCHECK_EQ(predecessor->GetNormalSuccessors().size(), 1u);
+ if (predecessor->GetSuccessors()[0]->GetPredecessors().size() > 1) {
+ outer_graph->SplitCriticalEdge(predecessor, to);
+ rerun_dominance = true;
+ if (predecessor->GetLoopInformation() != nullptr) {
+ rerun_loop_analysis = true;
+ }
+ }
+ }
}
}
if (rerun_loop_analysis) {
@@ -3047,6 +3170,7 @@ HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header,
HSuspendCheck* suspend_check = new (allocator_) HSuspendCheck(header->GetDexPc());
new_header->AddInstruction(suspend_check);
new_body->AddInstruction(new (allocator_) HGoto());
+ DCHECK(loop->GetSuspendCheck() != nullptr);
suspend_check->CopyEnvironmentFromWithLoopPhiAdjustment(
loop->GetSuspendCheck()->GetEnvironment(), header);
@@ -3091,6 +3215,12 @@ void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) {
SetPackedFlag<kFlagReferenceTypeIsExact>(rti.IsExact());
}
+void HInstruction::SetReferenceTypeInfoIfValid(ReferenceTypeInfo rti) {
+ if (rti.IsValid()) {
+ SetReferenceTypeInfo(rti);
+ }
+}
+
bool HBoundType::InstructionDataEquals(const HInstruction* other) const {
const HBoundType* other_bt = other->AsBoundType();
ScopedObjectAccess soa(Thread::Current());
@@ -3441,8 +3571,8 @@ static inline IntrinsicExceptions GetExceptionsIntrinsic(Intrinsics i) {
return kCanThrow;
}
-void HInvoke::SetResolvedMethod(ArtMethod* method) {
- if (method != nullptr && method->IsIntrinsic()) {
+void HInvoke::SetResolvedMethod(ArtMethod* method, bool enable_intrinsic_opt) {
+ if (method != nullptr && method->IsIntrinsic() && enable_intrinsic_opt) {
Intrinsics intrinsic = static_cast<Intrinsics>(method->GetIntrinsic());
SetIntrinsic(intrinsic,
NeedsEnvironmentIntrinsic(intrinsic),
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 7a0059f616..28112d176a 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -29,6 +29,7 @@
#include "base/array_ref.h"
#include "base/intrusive_forward_list.h"
#include "base/iteration_range.h"
+#include "base/macros.h"
#include "base/mutex.h"
#include "base/quasi_atomic.h"
#include "base/stl_util.h"
@@ -51,7 +52,7 @@
#include "mirror/method_type.h"
#include "offsets.h"
-namespace art {
+namespace art HIDDEN {
class ArenaStack;
class CodeGenerator;
@@ -406,6 +407,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
has_loops_(false),
has_irreducible_loops_(false),
has_direct_critical_native_call_(false),
+ has_always_throwing_invokes_(false),
dead_reference_safe_(dead_reference_safe),
debuggable_(debuggable),
current_instruction_id_(start_instruction_id),
@@ -485,9 +487,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
// Update the loop and try membership of `block`, which was spawned from `reference`.
// In case `reference` is a back edge, `replace_if_back_edge` notifies whether `block`
// should be the new back edge.
+ // `has_more_specific_try_catch_info` will be set to true when inlining a try catch.
void UpdateLoopAndTryInformationOfNewBlock(HBasicBlock* block,
HBasicBlock* reference,
- bool replace_if_back_edge);
+ bool replace_if_back_edge,
+ bool has_more_specific_try_catch_info = false);
// Need to add a couple of blocks to test if the loop body is entered and
// put deoptimization instructions, etc.
@@ -510,6 +514,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
HBasicBlock* SplitEdge(HBasicBlock* block, HBasicBlock* successor);
void SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor);
+
+ // Splits the edge between `block` and `successor` and then updates the graph's RPO to keep
+ // consistency without recomputing the whole graph.
+ HBasicBlock* SplitEdgeAndUpdateRPO(HBasicBlock* block, HBasicBlock* successor);
+
void OrderLoopHeaderPredecessors(HBasicBlock* header);
// Transform a loop into a format with a single preheader.
@@ -678,6 +687,13 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
return cha_single_implementation_list_;
}
+ // In case of OSR we intend to use SuspendChecks as an entry point to the
+ // function; for debuggable graphs we might deoptimize to interpreter from
+ // SuspendChecks. In these cases we should always generate code for them.
+ bool SuspendChecksAreAllowedToNoOp() const {
+ return !IsDebuggable() && !IsCompilingOsr();
+ }
+
void AddCHASingleImplementationDependency(ArtMethod* method) {
cha_single_implementation_list_.insert(method);
}
@@ -704,6 +720,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
bool HasDirectCriticalNativeCall() const { return has_direct_critical_native_call_; }
void SetHasDirectCriticalNativeCall(bool value) { has_direct_critical_native_call_ = value; }
+ bool HasAlwaysThrowingInvokes() const { return has_always_throwing_invokes_; }
+ void SetHasAlwaysThrowingInvokes(bool value) { has_always_throwing_invokes_ = value; }
+
ArtMethod* GetArtMethod() const { return art_method_; }
void SetArtMethod(ArtMethod* method) { art_method_ = method; }
@@ -719,12 +738,12 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
return ReferenceTypeInfo::Create(handle_cache_.GetObjectClassHandle(), /* is_exact= */ false);
}
- uint32_t GetNumberOfCHAGuards() { return number_of_cha_guards_; }
+ uint32_t GetNumberOfCHAGuards() const { return number_of_cha_guards_; }
void SetNumberOfCHAGuards(uint32_t num) { number_of_cha_guards_ = num; }
void IncrementNumberOfCHAGuards() { number_of_cha_guards_++; }
private:
- void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
+ void RemoveDeadBlocksInstructionsAsUsersAndDisconnect(const ArenaBitVector& visited) const;
void RemoveDeadBlocks(const ArenaBitVector& visited);
template <class InstructionType, typename ValueType>
@@ -792,14 +811,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
size_t temporaries_vreg_slots_;
// Flag whether there are bounds checks in the graph. We can skip
- // BCE if it's false. It's only best effort to keep it up to date in
- // the presence of code elimination so there might be false positives.
+ // BCE if it's false.
bool has_bounds_checks_;
// Flag whether there are try/catch blocks in the graph. We will skip
- // try/catch-related passes if it's false. It's only best effort to keep
- // it up to date in the presence of code elimination so there might be
- // false positives.
+ // try/catch-related passes if it's false.
bool has_try_catch_;
// Flag whether there are any HMonitorOperation in the graph. If yes this will mandate
@@ -812,20 +828,19 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
bool has_simd_;
// Flag whether there are any loops in the graph. We can skip loop
- // optimization if it's false. It's only best effort to keep it up
- // to date in the presence of code elimination so there might be false
- // positives.
+ // optimization if it's false.
bool has_loops_;
- // Flag whether there are any irreducible loops in the graph. It's only
- // best effort to keep it up to date in the presence of code elimination
- // so there might be false positives.
+ // Flag whether there are any irreducible loops in the graph.
bool has_irreducible_loops_;
// Flag whether there are any direct calls to native code registered
// for @CriticalNative methods.
bool has_direct_critical_native_call_;
+ // Flag whether the graph contains invokes that always throw.
+ bool has_always_throwing_invokes_;
+
// Is the code known to be robust against eliminating dead references
// and the effects of early finalization? If false, dead reference variables
// are kept if they might be visible to the garbage collector.
@@ -1291,7 +1306,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> {
// graph, create a Goto at the end of the former block and will create an edge
// between the blocks. It will not, however, update the reverse post order or
// loop and try/catch information.
- HBasicBlock* SplitBefore(HInstruction* cursor);
+ HBasicBlock* SplitBefore(HInstruction* cursor, bool require_graph_not_in_ssa_form = true);
// Split the block into two blocks just before `cursor`. Returns the newly
// created block. Note that this method just updates raw block information,
@@ -1332,6 +1347,20 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> {
// are safely updated.
void DisconnectAndDelete();
+ // Disconnects `this` from all its successors and updates their phis, if the successors have them.
+ // If `visited` is provided, it will use the information to know if a successor is reachable and
+ // skip updating those phis.
+ void DisconnectFromSuccessors(const ArenaBitVector* visited = nullptr);
+
+ // Removes the catch phi uses of the instructions in `this`, and then remove the instruction
+ // itself. If `building_dominator_tree` is true, it will not remove the instruction as user, since
+ // we do it in a previous step. This is a special case for building up the dominator tree: we want
+ // to eliminate uses before inputs but we don't have domination information, so we remove all
+ // connections from input/uses first before removing any instruction.
+ // This method assumes the instructions have been removed from all users with the exception of
+ // catch phis because of missing exceptional edges in the graph.
+ void RemoveCatchPhiUsesAndInstruction(bool building_dominator_tree);
+
void AddInstruction(HInstruction* instruction);
// Insert `instruction` before/after an existing instruction `cursor`.
void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor);
@@ -1540,10 +1569,10 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(Min, BinaryOperation) \
M(MonitorOperation, Instruction) \
M(Mul, BinaryOperation) \
- M(NativeDebugInfo, Instruction) \
M(Neg, UnaryOperation) \
M(NewArray, Instruction) \
M(NewInstance, Instruction) \
+ M(Nop, Instruction) \
M(Not, UnaryOperation) \
M(NotEqual, Condition) \
M(NullConstant, Instruction) \
@@ -2348,7 +2377,10 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
return GetType() == DataType::Type::kReference;
}
+ // Sets the ReferenceTypeInfo. The RTI must be valid.
void SetReferenceTypeInfo(ReferenceTypeInfo rti);
+ // Same as above, but we only set it if it's valid. Otherwise, we don't change the current RTI.
+ void SetReferenceTypeInfoIfValid(ReferenceTypeInfo rti);
ReferenceTypeInfo GetReferenceTypeInfo() const {
DCHECK_EQ(GetType(), DataType::Type::kReference);
@@ -2408,7 +2440,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
!CanThrow() &&
!IsSuspendCheck() &&
!IsControlFlow() &&
- !IsNativeDebugInfo() &&
+ !IsNop() &&
!IsParameterValue() &&
// If we added an explicit barrier then we should keep it.
!IsMemoryBarrier() &&
@@ -2419,9 +2451,12 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
return IsRemovable() && !HasUses();
}
- // Does this instruction strictly dominate `other_instruction`?
- // Returns false if this instruction and `other_instruction` are the same.
- // Aborts if this instruction and `other_instruction` are both phis.
+ // Does this instruction dominate `other_instruction`?
+ // Aborts if this instruction and `other_instruction` are different phis.
+ bool Dominates(HInstruction* other_instruction) const;
+
+ // Same but with `strictly dominates` i.e. returns false if this instruction and
+ // `other_instruction` are the same.
bool StrictlyDominates(HInstruction* other_instruction) const;
int GetId() const { return id_; }
@@ -2486,7 +2521,9 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
void SetLocations(LocationSummary* locations) { locations_ = locations; }
void ReplaceWith(HInstruction* instruction);
- void ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement);
+ void ReplaceUsesDominatedBy(HInstruction* dominator,
+ HInstruction* replacement,
+ bool strictly_dominated = true);
void ReplaceEnvUsesDominatedBy(HInstruction* dominator, HInstruction* replacement);
void ReplaceInput(HInstruction* replacement, size_t index);
@@ -3730,7 +3767,7 @@ class HClassTableGet final : public HExpression<1> {
static constexpr size_t kNumberOfClassTableGetPackedBits = kFieldTableKind + kFieldTableKindSize;
static_assert(kNumberOfClassTableGetPackedBits <= kMaxNumberOfPackedBits,
"Too many packed fields.");
- using TableKindField = BitField<TableKind, kFieldTableKind, kFieldTableKind>;
+ using TableKindField = BitField<TableKind, kFieldTableKind, kFieldTableKindSize>;
// The index of the ArtMethod in the table.
const size_t index_;
@@ -4700,7 +4737,7 @@ class HInvoke : public HVariableInputSizeInstruction {
void SetAlwaysThrows(bool always_throws) { SetPackedFlag<kFlagAlwaysThrows>(always_throws); }
- bool AlwaysThrows() const override { return GetPackedFlag<kFlagAlwaysThrows>(); }
+ bool AlwaysThrows() const override final { return GetPackedFlag<kFlagAlwaysThrows>(); }
bool CanBeMoved() const override { return IsIntrinsic() && !DoesAnyWrite(); }
@@ -4719,7 +4756,7 @@ class HInvoke : public HVariableInputSizeInstruction {
bool IsIntrinsic() const { return intrinsic_ != Intrinsics::kNone; }
ArtMethod* GetResolvedMethod() const { return resolved_method_; }
- void SetResolvedMethod(ArtMethod* method);
+ void SetResolvedMethod(ArtMethod* method, bool enable_intrinsic_opt);
MethodReference GetMethodReference() const { return method_reference_; }
@@ -4748,7 +4785,8 @@ class HInvoke : public HVariableInputSizeInstruction {
MethodReference method_reference,
ArtMethod* resolved_method,
MethodReference resolved_method_reference,
- InvokeType invoke_type)
+ InvokeType invoke_type,
+ bool enable_intrinsic_opt)
: HVariableInputSizeInstruction(
kind,
return_type,
@@ -4764,7 +4802,7 @@ class HInvoke : public HVariableInputSizeInstruction {
intrinsic_optimizations_(0) {
SetPackedField<InvokeTypeField>(invoke_type);
SetPackedFlag<kFlagCanThrow>(true);
- SetResolvedMethod(resolved_method);
+ SetResolvedMethod(resolved_method, enable_intrinsic_opt);
}
DEFAULT_COPY_CONSTRUCTOR(Invoke);
@@ -4797,7 +4835,8 @@ class HInvokeUnresolved final : public HInvoke {
method_reference,
nullptr,
MethodReference(nullptr, 0u),
- invoke_type) {
+ invoke_type,
+ /* enable_intrinsic_opt= */ false) {
}
bool IsClonable() const override { return true; }
@@ -4820,7 +4859,8 @@ class HInvokePolymorphic final : public HInvoke {
// to pass intrinsic information to the HInvokePolymorphic node.
ArtMethod* resolved_method,
MethodReference resolved_method_reference,
- dex::ProtoIndex proto_idx)
+ dex::ProtoIndex proto_idx,
+ bool enable_intrinsic_opt)
: HInvoke(kInvokePolymorphic,
allocator,
number_of_arguments,
@@ -4830,7 +4870,8 @@ class HInvokePolymorphic final : public HInvoke {
method_reference,
resolved_method,
resolved_method_reference,
- kPolymorphic),
+ kPolymorphic,
+ enable_intrinsic_opt),
proto_idx_(proto_idx) {
}
@@ -4852,7 +4893,8 @@ class HInvokeCustom final : public HInvoke {
uint32_t call_site_index,
DataType::Type return_type,
uint32_t dex_pc,
- MethodReference method_reference)
+ MethodReference method_reference,
+ bool enable_intrinsic_opt)
: HInvoke(kInvokeCustom,
allocator,
number_of_arguments,
@@ -4862,7 +4904,8 @@ class HInvokeCustom final : public HInvoke {
method_reference,
/* resolved_method= */ nullptr,
MethodReference(nullptr, 0u),
- kStatic),
+ kStatic,
+ enable_intrinsic_opt),
call_site_index_(call_site_index) {
}
@@ -4909,7 +4952,8 @@ class HInvokeStaticOrDirect final : public HInvoke {
DispatchInfo dispatch_info,
InvokeType invoke_type,
MethodReference resolved_method_reference,
- ClinitCheckRequirement clinit_check_requirement)
+ ClinitCheckRequirement clinit_check_requirement,
+ bool enable_intrinsic_opt)
: HInvoke(kInvokeStaticOrDirect,
allocator,
number_of_arguments,
@@ -4922,7 +4966,8 @@ class HInvokeStaticOrDirect final : public HInvoke {
method_reference,
resolved_method,
resolved_method_reference,
- invoke_type),
+ invoke_type,
+ enable_intrinsic_opt),
dispatch_info_(dispatch_info) {
SetPackedField<ClinitCheckRequirementField>(clinit_check_requirement);
}
@@ -5134,7 +5179,8 @@ class HInvokeVirtual final : public HInvoke {
MethodReference method_reference,
ArtMethod* resolved_method,
MethodReference resolved_method_reference,
- uint32_t vtable_index)
+ uint32_t vtable_index,
+ bool enable_intrinsic_opt)
: HInvoke(kInvokeVirtual,
allocator,
number_of_arguments,
@@ -5144,7 +5190,8 @@ class HInvokeVirtual final : public HInvoke {
method_reference,
resolved_method,
resolved_method_reference,
- kVirtual),
+ kVirtual,
+ enable_intrinsic_opt),
vtable_index_(vtable_index) {
}
@@ -5196,7 +5243,8 @@ class HInvokeInterface final : public HInvoke {
ArtMethod* resolved_method,
MethodReference resolved_method_reference,
uint32_t imt_index,
- MethodLoadKind load_kind)
+ MethodLoadKind load_kind,
+ bool enable_intrinsic_opt)
: HInvoke(kInvokeInterface,
allocator,
number_of_arguments + (NeedsCurrentMethod(load_kind) ? 1 : 0),
@@ -5206,7 +5254,8 @@ class HInvokeInterface final : public HInvoke {
method_reference,
resolved_method,
resolved_method_reference,
- kInterface),
+ kInterface,
+ enable_intrinsic_opt),
imt_index_(imt_index),
hidden_argument_load_kind_(load_kind) {
}
@@ -5321,7 +5370,7 @@ class HNewArray final : public HExpression<2> {
kFieldComponentSizeShift + kFieldComponentSizeShiftSize;
static_assert(kNumberOfNewArrayPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
using ComponentSizeShiftField =
- BitField<size_t, kFieldComponentSizeShift, kFieldComponentSizeShift>;
+ BitField<size_t, kFieldComponentSizeShift, kFieldComponentSizeShiftSize>;
};
class HAdd final : public HBinaryOperation {
@@ -6362,6 +6411,27 @@ class HPredicatedInstanceFieldGet final : public HExpression<2> {
const FieldInfo field_info_;
};
+enum class WriteBarrierKind {
+ // Emit the write barrier, with a runtime optimization which checks if the value that it is being
+ // set is null.
+ kEmitWithNullCheck,
+ // Emit the write barrier, without the runtime null check optimization. This could be set because:
+ // A) It is a write barrier for an ArraySet (which does the optimization with the type check, so
+ // it never does the optimization at the write barrier stage)
+ // B) We know that the input can't be null
+ // C) This write barrier is actually several write barriers coalesced into one. Potentially we
+ // could ask if every value is null for a runtime optimization at the cost of compile time / code
+ // size. At the time of writing it was deemed not worth the effort.
+ kEmitNoNullCheck,
+ // Skip emitting the write barrier. This could be set because:
+ // A) The write barrier is not needed (e.g. it is not a reference, or the value is the null
+ // constant)
+ // B) This write barrier was coalesced into another one so there's no need to emit it.
+ kDontEmit,
+ kLast = kDontEmit
+};
+std::ostream& operator<<(std::ostream& os, WriteBarrierKind rhs);
+
class HInstanceFieldSet final : public HExpression<2> {
public:
HInstanceFieldSet(HInstruction* object,
@@ -6386,6 +6456,7 @@ class HInstanceFieldSet final : public HExpression<2> {
dex_file) {
SetPackedFlag<kFlagValueCanBeNull>(true);
SetPackedFlag<kFlagIsPredicatedSet>(false);
+ SetPackedField<WriteBarrierKindField>(WriteBarrierKind::kEmitWithNullCheck);
SetRawInputAt(0, object);
SetRawInputAt(1, value);
}
@@ -6406,6 +6477,12 @@ class HInstanceFieldSet final : public HExpression<2> {
void ClearValueCanBeNull() { SetPackedFlag<kFlagValueCanBeNull>(false); }
bool GetIsPredicatedSet() const { return GetPackedFlag<kFlagIsPredicatedSet>(); }
void SetIsPredicatedSet(bool value = true) { SetPackedFlag<kFlagIsPredicatedSet>(value); }
+ WriteBarrierKind GetWriteBarrierKind() { return GetPackedField<WriteBarrierKindField>(); }
+ void SetWriteBarrierKind(WriteBarrierKind kind) {
+ DCHECK(kind != WriteBarrierKind::kEmitWithNullCheck)
+ << "We shouldn't go back to the original value.";
+ SetPackedField<WriteBarrierKindField>(kind);
+ }
DECLARE_INSTRUCTION(InstanceFieldSet);
@@ -6415,11 +6492,17 @@ class HInstanceFieldSet final : public HExpression<2> {
private:
static constexpr size_t kFlagValueCanBeNull = kNumberOfGenericPackedBits;
static constexpr size_t kFlagIsPredicatedSet = kFlagValueCanBeNull + 1;
- static constexpr size_t kNumberOfInstanceFieldSetPackedBits = kFlagIsPredicatedSet + 1;
+ static constexpr size_t kWriteBarrierKind = kFlagIsPredicatedSet + 1;
+ static constexpr size_t kWriteBarrierKindSize =
+ MinimumBitsToStore(static_cast<size_t>(WriteBarrierKind::kLast));
+ static constexpr size_t kNumberOfInstanceFieldSetPackedBits =
+ kWriteBarrierKind + kWriteBarrierKindSize;
static_assert(kNumberOfInstanceFieldSetPackedBits <= kMaxNumberOfPackedBits,
"Too many packed fields.");
const FieldInfo field_info_;
+ using WriteBarrierKindField =
+ BitField<WriteBarrierKind, kWriteBarrierKind, kWriteBarrierKindSize>;
};
class HArrayGet final : public HExpression<2> {
@@ -6540,6 +6623,8 @@ class HArraySet final : public HExpression<3> {
SetPackedFlag<kFlagNeedsTypeCheck>(value->GetType() == DataType::Type::kReference);
SetPackedFlag<kFlagValueCanBeNull>(true);
SetPackedFlag<kFlagStaticTypeOfArrayIsObjectArray>(false);
+ // ArraySets never do the null check optimization at the write barrier stage.
+ SetPackedField<WriteBarrierKindField>(WriteBarrierKind::kEmitNoNullCheck);
SetRawInputAt(0, array);
SetRawInputAt(1, index);
SetRawInputAt(2, value);
@@ -6560,8 +6645,10 @@ class HArraySet final : public HExpression<3> {
return false;
}
- void ClearNeedsTypeCheck() {
+ void ClearTypeCheck() {
SetPackedFlag<kFlagNeedsTypeCheck>(false);
+ // Clear the `CanTriggerGC` flag too as we can only trigger a GC when doing a type check.
+ SetSideEffects(GetSideEffects().Exclusion(SideEffects::CanTriggerGC()));
}
void ClearValueCanBeNull() {
@@ -6610,6 +6697,16 @@ class HArraySet final : public HExpression<3> {
: SideEffects::None();
}
+ WriteBarrierKind GetWriteBarrierKind() { return GetPackedField<WriteBarrierKindField>(); }
+
+ void SetWriteBarrierKind(WriteBarrierKind kind) {
+ DCHECK(kind != WriteBarrierKind::kEmitNoNullCheck)
+ << "We shouldn't go back to the original value.";
+ DCHECK(kind != WriteBarrierKind::kEmitWithNullCheck)
+ << "We never do the null check optimization for ArraySets.";
+ SetPackedField<WriteBarrierKindField>(kind);
+ }
+
DECLARE_INSTRUCTION(ArraySet);
protected:
@@ -6625,11 +6722,16 @@ class HArraySet final : public HExpression<3> {
// Cached information for the reference_type_info_ so that codegen
// does not need to inspect the static type.
static constexpr size_t kFlagStaticTypeOfArrayIsObjectArray = kFlagValueCanBeNull + 1;
- static constexpr size_t kNumberOfArraySetPackedBits =
- kFlagStaticTypeOfArrayIsObjectArray + 1;
+ static constexpr size_t kWriteBarrierKind = kFlagStaticTypeOfArrayIsObjectArray + 1;
+ static constexpr size_t kWriteBarrierKindSize =
+ MinimumBitsToStore(static_cast<size_t>(WriteBarrierKind::kLast));
+ static constexpr size_t kNumberOfArraySetPackedBits = kWriteBarrierKind + kWriteBarrierKindSize;
static_assert(kNumberOfArraySetPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
using ExpectedComponentTypeField =
BitField<DataType::Type, kFieldExpectedComponentType, kFieldExpectedComponentTypeSize>;
+
+ using WriteBarrierKindField =
+ BitField<WriteBarrierKind, kWriteBarrierKind, kWriteBarrierKindSize>;
};
class HArrayLength final : public HExpression<1> {
@@ -6714,9 +6816,10 @@ class HBoundsCheck final : public HExpression<2> {
class HSuspendCheck final : public HExpression<0> {
public:
- explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc)
+ explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc, bool is_no_op = false)
: HExpression(kSuspendCheck, SideEffects::CanTriggerGC(), dex_pc),
slow_path_(nullptr) {
+ SetPackedFlag<kFlagIsNoOp>(is_no_op);
}
bool IsClonable() const override { return true; }
@@ -6725,6 +6828,10 @@ class HSuspendCheck final : public HExpression<0> {
return true;
}
+ void SetIsNoOp(bool is_no_op) { SetPackedFlag<kFlagIsNoOp>(is_no_op); }
+ bool IsNoOp() const { return GetPackedFlag<kFlagIsNoOp>(); }
+
+
void SetSlowPath(SlowPathCode* slow_path) { slow_path_ = slow_path; }
SlowPathCode* GetSlowPath() const { return slow_path_; }
@@ -6733,28 +6840,42 @@ class HSuspendCheck final : public HExpression<0> {
protected:
DEFAULT_COPY_CONSTRUCTOR(SuspendCheck);
+ // True if the HSuspendCheck should not emit any code during codegen. It is
+ // not possible to simply remove this instruction to disable codegen, as
+ // other optimizations (e.g: CHAGuardVisitor::HoistGuard) depend on
+ // HSuspendCheck being present in every loop.
+ static constexpr size_t kFlagIsNoOp = kNumberOfGenericPackedBits;
+ static constexpr size_t kNumberOfSuspendCheckPackedBits = kFlagIsNoOp + 1;
+ static_assert(kNumberOfSuspendCheckPackedBits <= HInstruction::kMaxNumberOfPackedBits,
+ "Too many packed fields.");
+
private:
// Only used for code generation, in order to share the same slow path between back edges
// of a same loop.
SlowPathCode* slow_path_;
};
-// Pseudo-instruction which provides the native debugger with mapping information.
-// It ensures that we can generate line number and local variables at this point.
-class HNativeDebugInfo : public HExpression<0> {
+// Pseudo-instruction which doesn't generate any code.
+// If `emit_environment` is true, it can be used to generate an environment. It is used, for
+// example, to provide the native debugger with mapping information. It ensures that we can generate
+// line number and local variables at this point.
+class HNop : public HExpression<0> {
public:
- explicit HNativeDebugInfo(uint32_t dex_pc)
- : HExpression<0>(kNativeDebugInfo, SideEffects::None(), dex_pc) {
+ explicit HNop(uint32_t dex_pc, bool needs_environment)
+ : HExpression<0>(kNop, SideEffects::None(), dex_pc), needs_environment_(needs_environment) {
}
bool NeedsEnvironment() const override {
- return true;
+ return needs_environment_;
}
- DECLARE_INSTRUCTION(NativeDebugInfo);
+ DECLARE_INSTRUCTION(Nop);
protected:
- DEFAULT_COPY_CONSTRUCTOR(NativeDebugInfo);
+ DEFAULT_COPY_CONSTRUCTOR(Nop);
+
+ private:
+ bool needs_environment_;
};
/**
@@ -7222,6 +7343,10 @@ class HLoadMethodHandle final : public HInstruction {
return SideEffects::CanTriggerGC();
}
+ bool CanThrow() const override { return true; }
+
+ bool NeedsEnvironment() const override { return true; }
+
DECLARE_INSTRUCTION(LoadMethodHandle);
protected:
@@ -7266,6 +7391,10 @@ class HLoadMethodType final : public HInstruction {
return SideEffects::CanTriggerGC();
}
+ bool CanThrow() const override { return true; }
+
+ bool NeedsEnvironment() const override { return true; }
+
DECLARE_INSTRUCTION(LoadMethodType);
protected:
@@ -7400,6 +7529,7 @@ class HStaticFieldSet final : public HExpression<2> {
declaring_class_def_index,
dex_file) {
SetPackedFlag<kFlagValueCanBeNull>(true);
+ SetPackedField<WriteBarrierKindField>(WriteBarrierKind::kEmitWithNullCheck);
SetRawInputAt(0, cls);
SetRawInputAt(1, value);
}
@@ -7415,6 +7545,13 @@ class HStaticFieldSet final : public HExpression<2> {
bool GetValueCanBeNull() const { return GetPackedFlag<kFlagValueCanBeNull>(); }
void ClearValueCanBeNull() { SetPackedFlag<kFlagValueCanBeNull>(false); }
+ WriteBarrierKind GetWriteBarrierKind() { return GetPackedField<WriteBarrierKindField>(); }
+ void SetWriteBarrierKind(WriteBarrierKind kind) {
+ DCHECK(kind != WriteBarrierKind::kEmitWithNullCheck)
+ << "We shouldn't go back to the original value.";
+ SetPackedField<WriteBarrierKindField>(kind);
+ }
+
DECLARE_INSTRUCTION(StaticFieldSet);
protected:
@@ -7422,25 +7559,34 @@ class HStaticFieldSet final : public HExpression<2> {
private:
static constexpr size_t kFlagValueCanBeNull = kNumberOfGenericPackedBits;
- static constexpr size_t kNumberOfStaticFieldSetPackedBits = kFlagValueCanBeNull + 1;
+ static constexpr size_t kWriteBarrierKind = kFlagValueCanBeNull + 1;
+ static constexpr size_t kWriteBarrierKindSize =
+ MinimumBitsToStore(static_cast<size_t>(WriteBarrierKind::kLast));
+ static constexpr size_t kNumberOfStaticFieldSetPackedBits =
+ kWriteBarrierKind + kWriteBarrierKindSize;
static_assert(kNumberOfStaticFieldSetPackedBits <= kMaxNumberOfPackedBits,
"Too many packed fields.");
const FieldInfo field_info_;
+ using WriteBarrierKindField =
+ BitField<WriteBarrierKind, kWriteBarrierKind, kWriteBarrierKindSize>;
};
class HStringBuilderAppend final : public HVariableInputSizeInstruction {
public:
HStringBuilderAppend(HIntConstant* format,
uint32_t number_of_arguments,
+ bool has_fp_args,
ArenaAllocator* allocator,
uint32_t dex_pc)
: HVariableInputSizeInstruction(
kStringBuilderAppend,
DataType::Type::kReference,
- // The runtime call may read memory from inputs. It never writes outside
- // of the newly allocated result object (or newly allocated helper objects).
- SideEffects::AllReads().Union(SideEffects::CanTriggerGC()),
+ SideEffects::CanTriggerGC().Union(
+ // The runtime call may read memory from inputs. It never writes outside
+ // of the newly allocated result object or newly allocated helper objects,
+ // except for float/double arguments where we reuse thread-local helper objects.
+ has_fp_args ? SideEffects::AllWritesAndReads() : SideEffects::AllReads()),
dex_pc,
allocator,
number_of_arguments + /* format */ 1u,
@@ -8393,7 +8539,7 @@ class HIntermediateAddress final : public HExpression<2> {
#include "nodes_x86.h"
#endif
-namespace art {
+namespace art HIDDEN {
class OptimizingCompilerStats;
@@ -8457,7 +8603,7 @@ HInstruction* ReplaceInstrOrPhiByClone(HInstruction* instr);
// Create a clone for each clonable instructions/phis and replace the original with the clone.
//
// Used for testing individual instruction cloner.
-class CloneAndReplaceInstructionVisitor : public HGraphDelegateVisitor {
+class CloneAndReplaceInstructionVisitor final : public HGraphDelegateVisitor {
public:
explicit CloneAndReplaceInstructionVisitor(HGraph* graph)
: HGraphDelegateVisitor(graph), instr_replaced_by_clones_count_(0) {}
diff --git a/compiler/optimizing/nodes_shared.cc b/compiler/optimizing/nodes_shared.cc
index eca97d7a70..b3a7ad9a05 100644
--- a/compiler/optimizing/nodes_shared.cc
+++ b/compiler/optimizing/nodes_shared.cc
@@ -23,7 +23,7 @@
#include "instruction_simplifier_shared.h"
-namespace art {
+namespace art HIDDEN {
using helpers::CanFitInShifterOperand;
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index 7dcac1787e..27e610328f 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -22,7 +22,7 @@
// (defining `HInstruction` and co).
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
class HMultiplyAccumulate final : public HExpression<3> {
public:
diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc
index 34f0e9b1e1..29210fe10f 100644
--- a/compiler/optimizing/nodes_test.cc
+++ b/compiler/optimizing/nodes_test.cc
@@ -17,11 +17,12 @@
#include "nodes.h"
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "optimizing_unit_test.h"
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
class NodeTest : public OptimizingUnitTest {};
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index a2cd86dc33..73f6c40a0d 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -21,7 +21,7 @@
// is included in the header file nodes.h itself. However it gives editing tools better context.
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
// Memory alignment, represented as an offset relative to a base, where 0 <= offset < base,
// and base is a power of two. For example, the value Alignment(16, 0) means memory is
diff --git a/compiler/optimizing/nodes_vector_test.cc b/compiler/optimizing/nodes_vector_test.cc
index b0a665d704..e0a48db84f 100644
--- a/compiler/optimizing/nodes_vector_test.cc
+++ b/compiler/optimizing/nodes_vector_test.cc
@@ -15,10 +15,11 @@
*/
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
-namespace art {
+namespace art HIDDEN {
/**
* Fixture class for testing vector nodes.
diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h
index 8e8fbc1581..e246390aa5 100644
--- a/compiler/optimizing/nodes_x86.h
+++ b/compiler/optimizing/nodes_x86.h
@@ -17,7 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_NODES_X86_H_
#define ART_COMPILER_OPTIMIZING_NODES_X86_H_
-namespace art {
+namespace art HIDDEN {
// Compute the address of the method for X86 Constant area support.
class HX86ComputeBaseMethodAddress final : public HExpression<0> {
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index 2cac38b715..12e9a1046d 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -55,10 +55,11 @@
#include "select_generator.h"
#include "sharpening.h"
#include "side_effects_analysis.h"
+#include "write_barrier_elimination.h"
// Decide between default or alternative pass name.
-namespace art {
+namespace art HIDDEN {
const char* OptimizationPassName(OptimizationPass pass) {
switch (pass) {
@@ -76,6 +77,7 @@ const char* OptimizationPassName(OptimizationPass pass) {
return BoundsCheckElimination::kBoundsCheckEliminationPassName;
case OptimizationPass::kLoadStoreElimination:
return LoadStoreElimination::kLoadStoreEliminationPassName;
+ case OptimizationPass::kAggressiveConstantFolding:
case OptimizationPass::kConstantFolding:
return HConstantFolding::kConstantFoldingPassName;
case OptimizationPass::kDeadCodeElimination:
@@ -95,6 +97,8 @@ const char* OptimizationPassName(OptimizationPass pass) {
return ConstructorFenceRedundancyElimination::kCFREPassName;
case OptimizationPass::kScheduling:
return HInstructionScheduling::kInstructionSchedulingPassName;
+ case OptimizationPass::kWriteBarrierElimination:
+ return WriteBarrierElimination::kWBEPassName;
#ifdef ART_ENABLE_CODEGEN_arm
case OptimizationPass::kInstructionSimplifierArm:
return arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName;
@@ -194,7 +198,8 @@ ArenaVector<HOptimization*> ConstructOptimizations(
opt = most_recent_side_effects = new (allocator) SideEffectsAnalysis(graph, pass_name);
break;
case OptimizationPass::kInductionVarAnalysis:
- opt = most_recent_induction = new (allocator) HInductionVarAnalysis(graph, pass_name);
+ opt = most_recent_induction =
+ new (allocator) HInductionVarAnalysis(graph, stats, pass_name);
break;
//
// Passes that need prior analysis.
@@ -221,7 +226,11 @@ ArenaVector<HOptimization*> ConstructOptimizations(
// Regular passes.
//
case OptimizationPass::kConstantFolding:
- opt = new (allocator) HConstantFolding(graph, pass_name);
+ opt = new (allocator) HConstantFolding(graph, stats, pass_name);
+ break;
+ case OptimizationPass::kAggressiveConstantFolding:
+ opt = new (allocator)
+ HConstantFolding(graph, stats, pass_name, /* use_all_optimizations_ = */ true);
break;
case OptimizationPass::kDeadCodeElimination:
opt = new (allocator) HDeadCodeElimination(graph, stats, pass_name);
@@ -239,6 +248,7 @@ ArenaVector<HOptimization*> ConstructOptimizations(
/* total_number_of_instructions= */ 0,
/* parent= */ nullptr,
/* depth= */ 0,
+ /* try_catch_inlining_allowed= */ true,
pass_name);
break;
}
@@ -267,6 +277,9 @@ ArenaVector<HOptimization*> ConstructOptimizations(
case OptimizationPass::kLoadStoreElimination:
opt = new (allocator) LoadStoreElimination(graph, stats, pass_name);
break;
+ case OptimizationPass::kWriteBarrierElimination:
+ opt = new (allocator) WriteBarrierElimination(graph, stats, pass_name);
+ break;
case OptimizationPass::kScheduling:
opt = new (allocator) HInstructionScheduling(
graph, codegen->GetCompilerOptions().GetInstructionSet(), codegen, pass_name);
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index 2113df0c81..134e3cdc7a 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -18,10 +18,11 @@
#define ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_
#include "base/arena_object.h"
+#include "base/macros.h"
#include "nodes.h"
#include "optimizing_compiler_stats.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
class DexCompilationUnit;
@@ -42,7 +43,7 @@ class HOptimization : public ArenaObject<kArenaAllocOptimization> {
// Return the name of the pass. Pass names for a single HOptimization should be of form
// <optimization_name> or <optimization_name>$<pass_name> for common <optimization_name> prefix.
- // Example: 'instruction_simplifier', 'instruction_simplifier$after_bce',
+ // Example: 'instruction_simplifier', 'instruction_simplifier$before_codegen',
// 'instruction_simplifier$before_codegen'.
const char* GetPassName() const { return pass_name_; }
@@ -66,6 +67,7 @@ class HOptimization : public ArenaObject<kArenaAllocOptimization> {
// field is preferred over a string lookup at places where performance matters.
// TODO: generate this table and lookup methods below automatically?
enum class OptimizationPass {
+ kAggressiveConstantFolding,
kAggressiveInstructionSimplifier,
kBoundsCheckElimination,
kCHAGuardOptimization,
@@ -83,6 +85,7 @@ enum class OptimizationPass {
kScheduling,
kSelectGenerator,
kSideEffectsAnalysis,
+ kWriteBarrierElimination,
#ifdef ART_ENABLE_CODEGEN_arm
kInstructionSimplifierArm,
kCriticalNativeAbiFixupArm,
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index bad540e03c..f12e748941 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -18,6 +18,7 @@
#include <vector>
#include "arch/instruction_set.h"
+#include "base/macros.h"
#include "base/runtime_debug.h"
#include "cfi_test.h"
#include "driver/compiler_options.h"
@@ -32,7 +33,7 @@
namespace vixl32 = vixl::aarch32;
-namespace art {
+namespace art HIDDEN {
// Run the tests only on host.
#ifndef ART_TARGET_ANDROID
@@ -167,9 +168,20 @@ TEST_ISA(kThumb2)
// barrier configuration, and as such is removed from the set of
// callee-save registers in the ARM64 code generator of the Optimizing
// compiler.
-#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
-TEST_ISA(kArm64)
-#endif
+//
+// We can't use compile-time macros for read-barrier as the introduction
+// of userfaultfd-GC has made it a runtime choice.
+TEST_F(OptimizingCFITest, kArm64) {
+ if (kUseBakerReadBarrier && gUseReadBarrier) {
+ std::vector<uint8_t> expected_asm(
+ expected_asm_kArm64,
+ expected_asm_kArm64 + arraysize(expected_asm_kArm64));
+ std::vector<uint8_t> expected_cfi(
+ expected_cfi_kArm64,
+ expected_cfi_kArm64 + arraysize(expected_cfi_kArm64));
+ TestImpl(InstructionSet::kArm64, "kArm64", expected_asm, expected_cfi);
+ }
+}
#endif
#ifdef ART_ENABLE_CODEGEN_x86
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 6eb3d01e42..00eb6e5c42 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -33,12 +33,11 @@
#include "base/timing_logger.h"
#include "builder.h"
#include "code_generator.h"
-#include "compiled_method.h"
#include "compiler.h"
#include "debug/elf_debug_writer.h"
#include "debug/method_debug_info.h"
#include "dex/dex_file_types.h"
-#include "driver/compiled_method_storage.h"
+#include "driver/compiled_code_storage.h"
#include "driver/compiler_options.h"
#include "driver/dex_compilation_unit.h"
#include "graph_checker.h"
@@ -52,6 +51,7 @@
#include "linker/linker_patch.h"
#include "nodes.h"
#include "oat_quick_method_header.h"
+#include "optimizing/write_barrier_elimination.h"
#include "prepare_for_register_allocation.h"
#include "reference_type_propagation.h"
#include "register_allocator_linear_scan.h"
@@ -62,7 +62,7 @@
#include "stack_map_stream.h"
#include "utils/assembler.h"
-namespace art {
+namespace art HIDDEN {
static constexpr size_t kArenaAllocatorMemoryReportThreshold = 8 * MB;
@@ -269,7 +269,7 @@ class PassScope : public ValueObject {
class OptimizingCompiler final : public Compiler {
public:
explicit OptimizingCompiler(const CompilerOptions& compiler_options,
- CompiledMethodStorage* storage);
+ CompiledCodeStorage* storage);
~OptimizingCompiler() override;
bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const override;
@@ -359,11 +359,11 @@ class OptimizingCompiler final : public Compiler {
const DexCompilationUnit& dex_compilation_unit,
PassObserver* pass_observer) const;
- private:
// Create a 'CompiledMethod' for an optimized graph.
CompiledMethod* Emit(ArenaAllocator* allocator,
CodeVectorAllocator* code_allocator,
CodeGenerator* codegen,
+ bool is_intrinsic,
const dex::CodeItem* item) const;
// Try compiling a method and return the code generator used for
@@ -413,7 +413,7 @@ class OptimizingCompiler final : public Compiler {
static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */
OptimizingCompiler::OptimizingCompiler(const CompilerOptions& compiler_options,
- CompiledMethodStorage* storage)
+ CompiledCodeStorage* storage)
: Compiler(compiler_options, storage, kMaximumCompilationTimeBeforeWarning) {
// Enable C1visualizer output.
const std::string& cfg_file_name = compiler_options.GetDumpCfgFileName();
@@ -568,6 +568,9 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph,
}
#endif
default:
+ UNUSED(graph);
+ UNUSED(dex_compilation_unit);
+ UNUSED(pass_observer);
return false;
}
}
@@ -653,7 +656,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
OptDef(OptimizationPass::kGlobalValueNumbering),
// Simplification (TODO: only if GVN occurred).
OptDef(OptimizationPass::kSelectGenerator),
- OptDef(OptimizationPass::kConstantFolding,
+ OptDef(OptimizationPass::kAggressiveConstantFolding,
"constant_folding$after_gvn"),
OptDef(OptimizationPass::kInstructionSimplifier,
"instruction_simplifier$after_gvn"),
@@ -668,20 +671,27 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
OptDef(OptimizationPass::kLoopOptimization),
// Simplification.
OptDef(OptimizationPass::kConstantFolding,
- "constant_folding$after_bce"),
+ "constant_folding$after_loop_opt"),
OptDef(OptimizationPass::kAggressiveInstructionSimplifier,
- "instruction_simplifier$after_bce"),
+ "instruction_simplifier$after_loop_opt"),
+ OptDef(OptimizationPass::kDeadCodeElimination,
+ "dead_code_elimination$after_loop_opt"),
// Other high-level optimizations.
OptDef(OptimizationPass::kLoadStoreElimination),
OptDef(OptimizationPass::kCHAGuardOptimization),
- OptDef(OptimizationPass::kDeadCodeElimination,
- "dead_code_elimination$final"),
OptDef(OptimizationPass::kCodeSinking),
+ // Simplification.
+ OptDef(OptimizationPass::kConstantFolding,
+ "constant_folding$before_codegen"),
// The codegen has a few assumptions that only the instruction simplifier
// can satisfy. For example, the code generator does not expect to see a
// HTypeConversion from a type to the same type.
OptDef(OptimizationPass::kAggressiveInstructionSimplifier,
"instruction_simplifier$before_codegen"),
+ // Simplification may result in dead code that should be removed prior to
+ // code generation.
+ OptDef(OptimizationPass::kDeadCodeElimination,
+ "dead_code_elimination$before_codegen"),
// Eliminate constructor fences after code sinking to avoid
// complicated sinking logic to split a fence with many inputs.
OptDef(OptimizationPass::kConstructorFenceRedundancyElimination)
@@ -711,18 +721,19 @@ static ArenaVector<linker::LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator*
CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator,
CodeVectorAllocator* code_allocator,
CodeGenerator* codegen,
+ bool is_intrinsic,
const dex::CodeItem* code_item_for_osr_check) const {
ArenaVector<linker::LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
ScopedArenaVector<uint8_t> stack_map = codegen->BuildStackMaps(code_item_for_osr_check);
- CompiledMethodStorage* storage = GetCompiledMethodStorage();
- CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
- storage,
+ CompiledCodeStorage* storage = GetCompiledCodeStorage();
+ CompiledMethod* compiled_method = storage->CreateCompiledMethod(
codegen->GetInstructionSet(),
code_allocator->GetMemory(),
ArrayRef<const uint8_t>(stack_map),
ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
- ArrayRef<const linker::LinkerPatch>(linker_patches));
+ ArrayRef<const linker::LinkerPatch>(linker_patches),
+ is_intrinsic);
for (const linker::LinkerPatch& patch : linker_patches) {
if (codegen->NeedsThunkCode(patch) && storage->GetThunkCode(patch).empty()) {
@@ -891,6 +902,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
RunBaselineOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer);
} else {
RunOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer);
+ PassScope scope(WriteBarrierElimination::kWBEPassName, &pass_observer);
+ WriteBarrierElimination(graph, compilation_stats_.get()).Run();
}
RegisterAllocator::Strategy regalloc_strategy =
@@ -984,6 +997,10 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic(
optimizations);
RunArchOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer);
+ {
+ PassScope scope(WriteBarrierElimination::kWBEPassName, &pass_observer);
+ WriteBarrierElimination(graph, compilation_stats_.get()).Run();
+ }
AllocateRegisters(graph,
codegen.get(),
@@ -1079,10 +1096,8 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item,
compiled_method = Emit(&allocator,
&code_allocator,
codegen.get(),
+ compiled_intrinsic,
compiled_intrinsic ? nullptr : code_item);
- if (compiled_intrinsic) {
- compiled_method->MarkAsIntrinsic();
- }
if (kArenaAllocatorCountAllocations) {
codegen.reset(); // Release codegen's ScopedArenaAllocator for memory accounting.
@@ -1115,17 +1130,18 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item,
static ScopedArenaVector<uint8_t> CreateJniStackMap(ScopedArenaAllocator* allocator,
const JniCompiledMethod& jni_compiled_method,
- size_t code_size) {
+ size_t code_size,
+ bool debuggable) {
// StackMapStream is quite large, so allocate it using the ScopedArenaAllocator
// to stay clear of the frame size limit.
std::unique_ptr<StackMapStream> stack_map_stream(
new (allocator) StackMapStream(allocator, jni_compiled_method.GetInstructionSet()));
- stack_map_stream->BeginMethod(
- jni_compiled_method.GetFrameSize(),
- jni_compiled_method.GetCoreSpillMask(),
- jni_compiled_method.GetFpSpillMask(),
- /* num_dex_registers= */ 0,
- /* baseline= */ false);
+ stack_map_stream->BeginMethod(jni_compiled_method.GetFrameSize(),
+ jni_compiled_method.GetCoreSpillMask(),
+ jni_compiled_method.GetFpSpillMask(),
+ /* num_dex_registers= */ 0,
+ /* baseline= */ false,
+ debuggable);
stack_map_stream->EndMethod(code_size);
return stack_map_stream->Encode();
}
@@ -1172,12 +1188,11 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags,
method,
&handles));
if (codegen != nullptr) {
- CompiledMethod* compiled_method = Emit(&allocator,
- &code_allocator,
- codegen.get(),
- /* item= */ nullptr);
- compiled_method->MarkAsIntrinsic();
- return compiled_method;
+ return Emit(&allocator,
+ &code_allocator,
+ codegen.get(),
+ /*is_intrinsic=*/ true,
+ /*item=*/ nullptr);
}
}
}
@@ -1187,19 +1202,22 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags,
MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiledNativeStub);
ScopedArenaAllocator stack_map_allocator(&arena_stack); // Will hold the stack map.
- ScopedArenaVector<uint8_t> stack_map = CreateJniStackMap(
- &stack_map_allocator, jni_compiled_method, jni_compiled_method.GetCode().size());
- return CompiledMethod::SwapAllocCompiledMethod(
- GetCompiledMethodStorage(),
+ ScopedArenaVector<uint8_t> stack_map =
+ CreateJniStackMap(&stack_map_allocator,
+ jni_compiled_method,
+ jni_compiled_method.GetCode().size(),
+ compiler_options.GetDebuggable() && compiler_options.IsJitCompiler());
+ return GetCompiledCodeStorage()->CreateCompiledMethod(
jni_compiled_method.GetInstructionSet(),
jni_compiled_method.GetCode(),
ArrayRef<const uint8_t>(stack_map),
jni_compiled_method.GetCfi(),
- /* patches= */ ArrayRef<const linker::LinkerPatch>());
+ /*patches=*/ ArrayRef<const linker::LinkerPatch>(),
+ /*is_intrinsic=*/ false);
}
Compiler* CreateOptimizingCompiler(const CompilerOptions& compiler_options,
- CompiledMethodStorage* storage) {
+ CompiledCodeStorage* storage) {
return new OptimizingCompiler(compiler_options, storage);
}
@@ -1233,6 +1251,19 @@ bool OptimizingCompiler::JitCompile(Thread* self,
ArenaAllocator allocator(runtime->GetJitArenaPool());
if (UNLIKELY(method->IsNative())) {
+ // Use GenericJniTrampoline for critical native methods in debuggable runtimes. We don't
+ // support calling method entry / exit hooks for critical native methods yet.
+ // TODO(mythria): Add support for calling method entry / exit hooks in JITed stubs for critical
+ // native methods too.
+ if (compiler_options.GetDebuggable() && method->IsCriticalNative()) {
+ DCHECK(compiler_options.IsJitCompiler());
+ return false;
+ }
+ // Java debuggable runtimes should set compiler options to debuggable, so that we either
+ // generate method entry / exit hooks or skip JITing. For critical native methods we don't
+ // generate method entry / exit hooks so we shouldn't JIT them in debuggable runtimes.
+ DCHECK_IMPLIES(method->IsCriticalNative(), !runtime->IsJavaDebuggable());
+
JniCompiledMethod jni_compiled_method = ArtQuickJniCompileMethod(
compiler_options, access_flags, method_idx, *dex_file, &allocator);
std::vector<Handle<mirror::Object>> roots;
@@ -1241,8 +1272,11 @@ bool OptimizingCompiler::JitCompile(Thread* self,
ArenaStack arena_stack(runtime->GetJitArenaPool());
// StackMapStream is large and it does not fit into this frame, so we need helper method.
ScopedArenaAllocator stack_map_allocator(&arena_stack); // Will hold the stack map.
- ScopedArenaVector<uint8_t> stack_map = CreateJniStackMap(
- &stack_map_allocator, jni_compiled_method, jni_compiled_method.GetCode().size());
+ ScopedArenaVector<uint8_t> stack_map =
+ CreateJniStackMap(&stack_map_allocator,
+ jni_compiled_method,
+ jni_compiled_method.GetCode().size(),
+ compiler_options.GetDebuggable() && compiler_options.IsJitCompiler());
ArrayRef<const uint8_t> reserved_code;
ArrayRef<const uint8_t> reserved_data;
diff --git a/compiler/optimizing/optimizing_compiler.h b/compiler/optimizing/optimizing_compiler.h
index cd6d684590..737ffd034a 100644
--- a/compiler/optimizing/optimizing_compiler.h
+++ b/compiler/optimizing/optimizing_compiler.h
@@ -18,18 +18,19 @@
#define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_H_
#include "base/globals.h"
+#include "base/macros.h"
#include "base/mutex.h"
-namespace art {
+namespace art HIDDEN {
class ArtMethod;
+class CompiledCodeStorage;
class Compiler;
-class CompiledMethodStorage;
class CompilerOptions;
class DexFile;
Compiler* CreateOptimizingCompiler(const CompilerOptions& compiler_options,
- CompiledMethodStorage* storage);
+ CompiledCodeStorage* storage);
bool EncodeArtMethodInInlineInfo(ArtMethod* method);
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index d458e42608..a1d0a5a845 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -26,8 +26,9 @@
#include "base/atomic.h"
#include "base/globals.h"
+#include "base/macros.h"
-namespace art {
+namespace art HIDDEN {
enum class MethodCompilationStat {
kAttemptBytecodeCompilation = 0,
@@ -46,6 +47,7 @@ enum class MethodCompilationStat {
kUnresolvedFieldNotAFastAccess,
kRemovedCheckedCast,
kRemovedDeadInstruction,
+ kRemovedTry,
kRemovedNullCheck,
kNotCompiledSkipped,
kNotCompiledInvalidBytecode,
@@ -59,6 +61,7 @@ enum class MethodCompilationStat {
kNotCompiledSpaceFilter,
kNotCompiledUnhandledInstruction,
kNotCompiledUnsupportedIsa,
+ kNotCompiledInliningIrreducibleLoop,
kNotCompiledIrreducibleLoopAndStringInit,
kNotCompiledPhiEquivalentInOsr,
kInlinedMonomorphicCall,
@@ -73,11 +76,13 @@ enum class MethodCompilationStat {
kLoopVectorizedIdiom,
kSelectGenerated,
kRemovedInstanceOf,
+ kPropagatedIfValue,
kInlinedInvokeVirtualOrInterface,
kInlinedLastInvokeVirtualOrInterface,
kImplicitNullCheckGenerated,
kExplicitNullCheckGenerated,
kSimplifyIf,
+ kSimplifyIfAddedPhi,
kSimplifyThrowingInvoke,
kInstructionSunk,
kNotInlinedUnresolvedEntrypoint,
@@ -88,16 +93,19 @@ enum class MethodCompilationStat {
kNotInlinedEnvironmentBudget,
kNotInlinedInstructionBudget,
kNotInlinedLoopWithoutExit,
- kNotInlinedIrreducibleLoop,
+ kNotInlinedIrreducibleLoopCallee,
+ kNotInlinedIrreducibleLoopCaller,
kNotInlinedAlwaysThrows,
kNotInlinedInfiniteLoop,
- kNotInlinedTryCatchCaller,
kNotInlinedTryCatchCallee,
+ kNotInlinedTryCatchDisabled,
kNotInlinedRegisterAllocator,
kNotInlinedCannotBuild,
+ kNotInlinedNeverInlineAnnotation,
kNotInlinedNotCompilable,
kNotInlinedNotVerified,
kNotInlinedCodeItem,
+ kNotInlinedEndsWithThrow,
kNotInlinedWont,
kNotInlinedRecursiveBudget,
kNotInlinedPolymorphicRecursiveBudget,
@@ -105,12 +113,15 @@ enum class MethodCompilationStat {
kNotInlinedUnresolved,
kNotInlinedPolymorphic,
kNotInlinedCustom,
+ kNotVarAnalyzedPathological,
kTryInline,
kConstructorFenceGeneratedNew,
kConstructorFenceGeneratedFinal,
kConstructorFenceRemovedLSE,
kConstructorFenceRemovedPFRA,
kConstructorFenceRemovedCFRE,
+ kPossibleWriteBarrier,
+ kRemovedWriteBarrier,
kBitstringTypeCheck,
kJitOutOfMemoryForCommit,
kFullLSEAllocationRemoved,
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index e83688039a..2e05c41f01 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -25,6 +25,7 @@
#include <vector>
#include <variant>
+#include "base/macros.h"
#include "base/indenter.h"
#include "base/malloc_arena_pool.h"
#include "base/scoped_arena_allocator.h"
@@ -46,7 +47,7 @@
#include "ssa_builder.h"
#include "ssa_liveness_analysis.h"
-namespace art {
+namespace art HIDDEN {
#define NUM_INSTRUCTIONS(...) \
(sizeof((uint16_t[]) {__VA_ARGS__}) /sizeof(uint16_t))
@@ -240,13 +241,14 @@ class OptimizingUnitTestHelper {
// Create the dex file based on the fake data. Call the constructor so that we can use virtual
// functions. Don't use the arena for the StandardDexFile otherwise the dex location leaks.
- dex_files_.emplace_back(new StandardDexFile(
- dex_data,
- sizeof(StandardDexFile::Header),
- "no_location",
- /*location_checksum*/ 0,
- /*oat_dex_file*/ nullptr,
- /*container*/ nullptr));
+ auto container =
+ std::make_shared<MemoryDexFileContainer>(dex_data, sizeof(StandardDexFile::Header));
+ dex_files_.emplace_back(new StandardDexFile(dex_data,
+ sizeof(StandardDexFile::Header),
+ "no_location",
+ /*location_checksum*/ 0,
+ /*oat_dex_file*/ nullptr,
+ std::move(container)));
graph_ = new (allocator) HGraph(
allocator,
@@ -260,9 +262,10 @@ class OptimizingUnitTestHelper {
// Create a control-flow graph from Dex instructions.
HGraph* CreateCFG(const std::vector<uint16_t>& data,
- DataType::Type return_type = DataType::Type::kInt32,
- VariableSizedHandleScope* handles = nullptr) {
- HGraph* graph = CreateGraph(handles);
+ DataType::Type return_type = DataType::Type::kInt32) {
+ ScopedObjectAccess soa(Thread::Current());
+ VariableSizedHandleScope handles(soa.Self());
+ HGraph* graph = CreateGraph(&handles);
// The code item data might not aligned to 4 bytes, copy it to ensure that.
const size_t code_item_size = data.size() * sizeof(data.front());
@@ -278,7 +281,7 @@ class OptimizingUnitTestHelper {
/* class_linker= */ nullptr,
graph->GetDexFile(),
code_item,
- /* class_def_index= */ DexFile::kDexNoIndex16,
+ /* class_def_idx= */ DexFile::kDexNoIndex16,
/* method_idx= */ dex::kDexNoIndex,
/* access_flags= */ 0u,
/* verified_method= */ nullptr,
@@ -320,25 +323,10 @@ class OptimizingUnitTestHelper {
// Run GraphChecker with all checks.
//
// Return: the status whether the run is successful.
- bool CheckGraph(HGraph* graph, std::ostream& oss = std::cerr) {
- return CheckGraph(graph, /*check_ref_type_info=*/true, oss);
- }
-
bool CheckGraph(std::ostream& oss = std::cerr) {
return CheckGraph(graph_, oss);
}
- // Run GraphChecker with all checks except reference type information checks.
- //
- // Return: the status whether the run is successful.
- bool CheckGraphSkipRefTypeInfoChecks(HGraph* graph, std::ostream& oss = std::cerr) {
- return CheckGraph(graph, /*check_ref_type_info=*/false, oss);
- }
-
- bool CheckGraphSkipRefTypeInfoChecks(std::ostream& oss = std::cerr) {
- return CheckGraphSkipRefTypeInfoChecks(graph_, oss);
- }
-
HEnvironment* ManuallyBuildEnvFor(HInstruction* instruction,
ArenaVector<HInstruction*>* current_locals) {
HEnvironment* environment = new (GetAllocator()) HEnvironment(
@@ -473,7 +461,8 @@ class OptimizingUnitTestHelper {
HInvokeStaticOrDirect::DispatchInfo{},
InvokeType::kStatic,
/* resolved_method_reference= */ method_reference,
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
for (auto [ins, idx] : ZipCount(MakeIterationRange(args))) {
res->SetRawInputAt(idx, ins);
}
@@ -531,9 +520,8 @@ class OptimizingUnitTestHelper {
}
protected:
- bool CheckGraph(HGraph* graph, bool check_ref_type_info, std::ostream& oss) {
+ bool CheckGraph(HGraph* graph, std::ostream& oss) {
GraphChecker checker(graph);
- checker.SetRefTypeInfoCheckEnabled(check_ref_type_info);
checker.Run();
checker.Dump(oss);
return checker.IsValid();
@@ -559,7 +547,7 @@ class OptimizingUnitTestHelper {
class OptimizingUnitTest : public CommonArtTest, public OptimizingUnitTestHelper {};
// Naive string diff data type.
-typedef std::list<std::pair<std::string, std::string>> diff_t;
+using diff_t = std::list<std::pair<std::string, std::string>>;
// An alias for the empty string used to make it clear that a line is
// removed in a diff.
@@ -586,7 +574,7 @@ inline std::ostream& operator<<(std::ostream& oss, const AdjacencyListGraph& alg
return alg.Dump(oss);
}
-class PatternMatchGraphVisitor : public HGraphVisitor {
+class PatternMatchGraphVisitor final : public HGraphVisitor {
private:
struct HandlerWrapper {
public:
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 2036b4a370..9fc4cc86bf 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -19,7 +19,7 @@
#include "base/stl_util.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
// Perform a linear sweep of the moves to add them to the initial list of
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index 5fadcab402..17d5122542 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -18,11 +18,12 @@
#define ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_
#include "base/arena_containers.h"
+#include "base/macros.h"
#include "base/value_object.h"
#include "data_type.h"
#include "locations.h"
-namespace art {
+namespace art HIDDEN {
class HParallelMove;
class MoveOperands;
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index a8ab6cdd0c..a1c05e9cad 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -15,6 +15,7 @@
*/
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "base/malloc_arena_pool.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
@@ -22,7 +23,7 @@
#include "gtest/gtest-typed-test.h"
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
constexpr int kScratchRegisterStartIndexForTest = 100;
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index 17f37f05c5..d3da3d3ce1 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -18,13 +18,13 @@
#include "code_generator_x86.h"
#include "intrinsics_x86.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
/**
* Finds instructions that need the constant area base as an input.
*/
-class PCRelativeHandlerVisitor : public HGraphVisitor {
+class PCRelativeHandlerVisitor final : public HGraphVisitor {
public:
PCRelativeHandlerVisitor(HGraph* graph, CodeGenerator* codegen)
: HGraphVisitor(graph),
diff --git a/compiler/optimizing/pc_relative_fixups_x86.h b/compiler/optimizing/pc_relative_fixups_x86.h
index 3b470a6502..45578d8050 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.h
+++ b/compiler/optimizing/pc_relative_fixups_x86.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_X86_H_
#define ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_X86_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index c2f3d0e741..398b10abf3 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -22,7 +22,7 @@
#include "optimizing_compiler_stats.h"
#include "well_known_classes.h"
-namespace art {
+namespace art HIDDEN {
void PrepareForRegisterAllocation::Run() {
// Order does not matter.
@@ -83,7 +83,7 @@ void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) {
if (check->IsStringCharAt()) {
// Add a fake environment for String.charAt() inline info as we want the exception
// to appear as being thrown from there. Skip if we're compiling String.charAt() itself.
- ArtMethod* char_at_method = jni::DecodeArtMethod(WellKnownClasses::java_lang_String_charAt);
+ ArtMethod* char_at_method = WellKnownClasses::java_lang_String_charAt;
if (GetGraph()->GetArtMethod() != char_at_method) {
ArenaAllocator* allocator = GetGraph()->GetAllocator();
HEnvironment* environment = new (allocator) HEnvironment(allocator,
@@ -109,7 +109,7 @@ void PrepareForRegisterAllocation::VisitArraySet(HArraySet* instruction) {
if (value->IsNullConstant()) {
DCHECK_EQ(value->GetType(), DataType::Type::kReference);
if (instruction->NeedsTypeCheck()) {
- instruction->ClearNeedsTypeCheck();
+ instruction->ClearTypeCheck();
}
}
}
@@ -295,15 +295,16 @@ bool PrepareForRegisterAllocation::CanMoveClinitCheck(HInstruction* input,
return false;
}
- // In debug mode, check that we have not inserted a throwing instruction
- // or an instruction with side effects between input and user.
- if (kIsDebugBuild) {
- for (HInstruction* between = input->GetNext(); between != user; between = between->GetNext()) {
- CHECK(between != nullptr); // User must be after input in the same block.
- CHECK(!between->CanThrow()) << *between << " User: " << *user;
- CHECK(!between->HasSideEffects()) << *between << " User: " << *user;
+ // If there's a instruction between them that can throw or it has side effects, we cannot move the
+ // responsibility.
+ for (HInstruction* between = input->GetNext(); between != user; between = between->GetNext()) {
+ DCHECK(between != nullptr) << " User must be after input in the same block. input: " << *input
+ << ", user: " << *user;
+ if (between->CanThrow() || between->HasSideEffects()) {
+ return false;
}
}
+
return true;
}
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index e0bb76eb22..0426f8470b 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_PREPARE_FOR_REGISTER_ALLOCATION_H_
#define ART_COMPILER_OPTIMIZING_PREPARE_FOR_REGISTER_ALLOCATION_H_
+#include "base/macros.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
class CompilerOptions;
class OptimizingCompilerStats;
diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h
index 8ef9ce4e8b..77ddb97707 100644
--- a/compiler/optimizing/pretty_printer.h
+++ b/compiler/optimizing/pretty_printer.h
@@ -19,9 +19,10 @@
#include "android-base/stringprintf.h"
+#include "base/macros.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
class HPrettyPrinter : public HGraphVisitor {
public:
diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc
index 6ef386b4a5..90d5f8f08f 100644
--- a/compiler/optimizing/pretty_printer_test.cc
+++ b/compiler/optimizing/pretty_printer_test.cc
@@ -17,6 +17,7 @@
#include "pretty_printer.h"
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "dex/dex_file.h"
#include "dex/dex_instruction.h"
@@ -25,9 +26,9 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
-class PrettyPrinterTest : public OptimizingUnitTest {
+class PrettyPrinterTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
void TestCode(const std::vector<uint16_t>& data, const char* expected);
};
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index e6024b08cb..91bae5f49b 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -29,7 +29,7 @@
#include "mirror/dex_cache.h"
#include "scoped_thread_state_change-inl.h"
-namespace art {
+namespace art HIDDEN {
static inline ObjPtr<mirror::DexCache> FindDexCacheWithHint(
Thread* self, const DexFile& dex_file, Handle<mirror::DexCache> hint_dex_cache)
@@ -41,18 +41,14 @@ static inline ObjPtr<mirror::DexCache> FindDexCacheWithHint(
}
}
-class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor {
+class ReferenceTypePropagation::RTPVisitor final : public HGraphDelegateVisitor {
public:
- RTPVisitor(HGraph* graph,
- Handle<mirror::ClassLoader> class_loader,
- Handle<mirror::DexCache> hint_dex_cache,
- bool is_first_run)
- : HGraphDelegateVisitor(graph),
- class_loader_(class_loader),
- hint_dex_cache_(hint_dex_cache),
- allocator_(graph->GetArenaStack()),
- worklist_(allocator_.Adapter(kArenaAllocReferenceTypePropagation)),
- is_first_run_(is_first_run) {
+ RTPVisitor(HGraph* graph, Handle<mirror::DexCache> hint_dex_cache, bool is_first_run)
+ : HGraphDelegateVisitor(graph),
+ hint_dex_cache_(hint_dex_cache),
+ allocator_(graph->GetArenaStack()),
+ worklist_(allocator_.Adapter(kArenaAllocReferenceTypePropagation)),
+ is_first_run_(is_first_run) {
worklist_.reserve(kDefaultWorklistSize);
}
@@ -110,7 +106,6 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor {
static constexpr size_t kDefaultWorklistSize = 8;
- Handle<mirror::ClassLoader> class_loader_;
Handle<mirror::DexCache> hint_dex_cache_;
// Use local allocator for allocating memory.
@@ -122,63 +117,18 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor {
};
ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph,
- Handle<mirror::ClassLoader> class_loader,
Handle<mirror::DexCache> hint_dex_cache,
bool is_first_run,
const char* name)
- : HOptimization(graph, name),
- class_loader_(class_loader),
- hint_dex_cache_(hint_dex_cache),
- is_first_run_(is_first_run) {
-}
-
-void ReferenceTypePropagation::ValidateTypes() {
- // TODO: move this to the graph checker. Note: There may be no Thread for gtests.
- if (kIsDebugBuild && Thread::Current() != nullptr) {
- ScopedObjectAccess soa(Thread::Current());
- for (HBasicBlock* block : graph_->GetReversePostOrder()) {
- for (HInstructionIterator iti(block->GetInstructions()); !iti.Done(); iti.Advance()) {
- HInstruction* instr = iti.Current();
- if (instr->GetType() == DataType::Type::kReference) {
- DCHECK(instr->GetReferenceTypeInfo().IsValid())
- << "Invalid RTI for instruction: " << instr->DebugName();
- if (instr->IsBoundType()) {
- DCHECK(instr->AsBoundType()->GetUpperBound().IsValid());
- } else if (instr->IsLoadClass()) {
- HLoadClass* cls = instr->AsLoadClass();
- DCHECK(cls->GetReferenceTypeInfo().IsExact());
- DCHECK_IMPLIES(cls->GetLoadedClassRTI().IsValid(), cls->GetLoadedClassRTI().IsExact());
- } else if (instr->IsNullCheck()) {
- DCHECK(instr->GetReferenceTypeInfo().IsEqual(instr->InputAt(0)->GetReferenceTypeInfo()))
- << "NullCheck " << instr->GetReferenceTypeInfo()
- << "Input(0) " << instr->InputAt(0)->GetReferenceTypeInfo();
- }
- } else if (instr->IsInstanceOf()) {
- HInstanceOf* iof = instr->AsInstanceOf();
- DCHECK_IMPLIES(iof->GetTargetClassRTI().IsValid(), iof->GetTargetClassRTI().IsExact());
- } else if (instr->IsCheckCast()) {
- HCheckCast* check = instr->AsCheckCast();
- DCHECK_IMPLIES(check->GetTargetClassRTI().IsValid(),
- check->GetTargetClassRTI().IsExact());
- }
- }
- }
- }
-}
+ : HOptimization(graph, name), hint_dex_cache_(hint_dex_cache), is_first_run_(is_first_run) {}
void ReferenceTypePropagation::Visit(HInstruction* instruction) {
- RTPVisitor visitor(graph_,
- class_loader_,
- hint_dex_cache_,
- is_first_run_);
+ RTPVisitor visitor(graph_, hint_dex_cache_, is_first_run_);
instruction->Accept(&visitor);
}
void ReferenceTypePropagation::Visit(ArrayRef<HInstruction* const> instructions) {
- RTPVisitor visitor(graph_,
- class_loader_,
- hint_dex_cache_,
- is_first_run_);
+ RTPVisitor visitor(graph_, hint_dex_cache_, is_first_run_);
for (HInstruction* instruction : instructions) {
if (instruction->IsPhi()) {
// Need to force phis to recalculate null-ness.
@@ -349,7 +299,10 @@ static void BoundTypeForClassCheck(HInstruction* check) {
}
bool ReferenceTypePropagation::Run() {
- RTPVisitor visitor(graph_, class_loader_, hint_dex_cache_, is_first_run_);
+ DCHECK(Thread::Current() != nullptr)
+ << "ReferenceTypePropagation requires the use of Thread::Current(). Make sure you have a "
+ << "Runtime initialized before calling this optimization pass";
+ RTPVisitor visitor(graph_, hint_dex_cache_, is_first_run_);
// To properly propagate type info we need to visit in the dominator-based order.
// Reverse post order guarantees a node's dominators are visited first.
@@ -359,7 +312,6 @@ bool ReferenceTypePropagation::Run() {
}
visitor.ProcessWorklist();
- ValidateTypes();
return true;
}
@@ -446,10 +398,13 @@ static bool MatchIfInstanceOf(HIf* ifInstruction,
if (rhs->AsIntConstant()->IsTrue()) {
// Case (1a)
*trueBranch = ifInstruction->IfTrueSuccessor();
- } else {
+ } else if (rhs->AsIntConstant()->IsFalse()) {
// Case (2a)
- DCHECK(rhs->AsIntConstant()->IsFalse()) << rhs->AsIntConstant()->GetValue();
*trueBranch = ifInstruction->IfFalseSuccessor();
+ } else {
+ // Sometimes we see a comparison of instance-of with a constant which is neither 0 nor 1.
+ // In those cases, we cannot do the match if+instance-of.
+ return false;
}
*instanceOf = lhs->AsInstanceOf();
return true;
@@ -463,10 +418,13 @@ static bool MatchIfInstanceOf(HIf* ifInstruction,
if (rhs->AsIntConstant()->IsFalse()) {
// Case (1b)
*trueBranch = ifInstruction->IfTrueSuccessor();
- } else {
+ } else if (rhs->AsIntConstant()->IsTrue()) {
// Case (2b)
- DCHECK(rhs->AsIntConstant()->IsTrue()) << rhs->AsIntConstant()->GetValue();
*trueBranch = ifInstruction->IfFalseSuccessor();
+ } else {
+ // Sometimes we see a comparison of instance-of with a constant which is neither 0 nor 1.
+ // In those cases, we cannot do the match if+instance-of.
+ return false;
}
*instanceOf = lhs->AsInstanceOf();
return true;
@@ -583,7 +541,7 @@ void ReferenceTypePropagation::RTPVisitor::UpdateReferenceTypeInfo(HInstruction*
ScopedObjectAccess soa(Thread::Current());
ObjPtr<mirror::DexCache> dex_cache = FindDexCacheWithHint(soa.Self(), dex_file, hint_dex_cache_);
ObjPtr<mirror::Class> klass = Runtime::Current()->GetClassLinker()->LookupResolvedType(
- type_idx, dex_cache, class_loader_.Get());
+ type_idx, dex_cache, dex_cache->GetClassLoader());
SetClassAsTypeInfo(instr, klass, is_exact);
}
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index 889a8465e0..655f62b3da 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -18,12 +18,13 @@
#define ART_COMPILER_OPTIMIZING_REFERENCE_TYPE_PROPAGATION_H_
#include "base/arena_containers.h"
+#include "base/macros.h"
#include "mirror/class-inl.h"
#include "nodes.h"
#include "obj_ptr.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
/**
* Propagates reference types to instructions.
@@ -31,7 +32,6 @@ namespace art {
class ReferenceTypePropagation : public HOptimization {
public:
ReferenceTypePropagation(HGraph* graph,
- Handle<mirror::ClassLoader> class_loader,
Handle<mirror::DexCache> hint_dex_cache,
bool is_first_run,
const char* name = kReferenceTypePropagationPassName);
@@ -71,10 +71,6 @@ class ReferenceTypePropagation : public HOptimization {
HandleCache* handle_cache)
REQUIRES_SHARED(Locks::mutator_lock_);
- void ValidateTypes();
-
- Handle<mirror::ClassLoader> class_loader_;
-
// Note: hint_dex_cache_ is usually, but not necessarily, the dex cache associated with
// graph_->GetDexFile(). Since we may look up also in other dex files, it's used only
// as a hint, to reduce the number of calls to the costly ClassLinker::FindDexCache().
diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc
index d1bcab083c..2b012fcd67 100644
--- a/compiler/optimizing/reference_type_propagation_test.cc
+++ b/compiler/optimizing/reference_type_propagation_test.cc
@@ -19,6 +19,7 @@
#include <random>
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "base/transform_array_ref.h"
#include "base/transform_iterator.h"
#include "builder.h"
@@ -26,7 +27,7 @@
#include "object_lock.h"
#include "optimizing_unit_test.h"
-namespace art {
+namespace art HIDDEN {
// TODO It would be good to use the following but there is a miniscule amount of
// chance for flakiness so we'll just use a set seed instead.
@@ -47,11 +48,8 @@ class ReferenceTypePropagationTestBase : public SuperTest, public OptimizingUnit
void SetupPropagation(VariableSizedHandleScope* handles) {
graph_ = CreateGraph(handles);
- propagation_ = new (GetAllocator()) ReferenceTypePropagation(graph_,
- Handle<mirror::ClassLoader>(),
- Handle<mirror::DexCache>(),
- true,
- "test_prop");
+ propagation_ = new (GetAllocator())
+ ReferenceTypePropagation(graph_, Handle<mirror::DexCache>(), true, "test_prop");
}
// Relay method to merge type in reference type propagation.
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index 875c633889..53e11f2c3d 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -21,7 +21,7 @@
#include "linear_order.h"
#include "ssa_liveness_analysis.h"
-namespace art {
+namespace art HIDDEN {
RegisterAllocationResolver::RegisterAllocationResolver(CodeGenerator* codegen,
const SsaLivenessAnalysis& liveness)
diff --git a/compiler/optimizing/register_allocation_resolver.h b/compiler/optimizing/register_allocation_resolver.h
index 278371777d..f4782eb48e 100644
--- a/compiler/optimizing/register_allocation_resolver.h
+++ b/compiler/optimizing/register_allocation_resolver.h
@@ -18,10 +18,11 @@
#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_
#include "base/array_ref.h"
+#include "base/macros.h"
#include "base/value_object.h"
#include "data_type.h"
-namespace art {
+namespace art HIDDEN {
class ArenaAllocator;
class CodeGenerator;
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index a9c217fc4f..e4c2d74908 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -27,7 +27,7 @@
#include "register_allocator_linear_scan.h"
#include "ssa_liveness_analysis.h"
-namespace art {
+namespace art HIDDEN {
RegisterAllocator::RegisterAllocator(ScopedArenaAllocator* allocator,
CodeGenerator* codegen,
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 4d226875bf..453e339cba 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -22,7 +22,7 @@
#include "base/arena_object.h"
#include "base/macros.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
class HBasicBlock;
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
index 684aaf5750..a7c891d4e7 100644
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -22,7 +22,7 @@
#include "ssa_liveness_analysis.h"
#include "thread-current-inl.h"
-namespace art {
+namespace art HIDDEN {
// Highest number of registers that we support for any platform. This can be used for std::bitset,
// for example, which needs to know its size at compile time.
diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h
index e5b86eacee..0e10152049 100644
--- a/compiler/optimizing/register_allocator_graph_color.h
+++ b/compiler/optimizing/register_allocator_graph_color.h
@@ -24,7 +24,7 @@
#include "base/scoped_arena_containers.h"
#include "register_allocator.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
class HBasicBlock;
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
index 833c24d5bb..fcdaa2d34f 100644
--- a/compiler/optimizing/register_allocator_linear_scan.cc
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -26,7 +26,7 @@
#include "register_allocation_resolver.h"
#include "ssa_liveness_analysis.h"
-namespace art {
+namespace art HIDDEN {
static constexpr size_t kMaxLifetimePosition = -1;
static constexpr size_t kDefaultNumberOfSpillSlots = 4;
diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h
index 9a1e0d7f10..c71a9e9ff1 100644
--- a/compiler/optimizing/register_allocator_linear_scan.h
+++ b/compiler/optimizing/register_allocator_linear_scan.h
@@ -22,7 +22,7 @@
#include "base/scoped_arena_containers.h"
#include "register_allocator.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
class HBasicBlock;
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 682315545d..d316aa5dc2 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -18,6 +18,7 @@
#include "arch/x86/instruction_set_features_x86.h"
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "code_generator.h"
#include "code_generator_x86.h"
@@ -31,17 +32,17 @@
#include "ssa_liveness_analysis.h"
#include "ssa_phi_elimination.h"
-namespace art {
+namespace art HIDDEN {
using Strategy = RegisterAllocator::Strategy;
// Note: the register allocator tests rely on the fact that constants have live
// intervals and registers get allocated to them.
-class RegisterAllocatorTest : public OptimizingUnitTest {
+class RegisterAllocatorTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
void SetUp() override {
- OptimizingUnitTest::SetUp();
+ CommonCompilerTest::SetUp();
// This test is using the x86 ISA.
compiler_options_ = CommonCompilerTest::CreateCompilerOptions(InstructionSet::kX86, "default");
}
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index 8f18ccff5f..116f52605e 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -32,7 +32,7 @@
#include "scheduler_arm.h"
#endif
-namespace art {
+namespace art HIDDEN {
void SchedulingGraph::AddDependency(SchedulingNode* node,
SchedulingNode* dependency,
@@ -718,9 +718,10 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
// HLoadException
// HMemoryBarrier
// HMonitorOperation
- // HNativeDebugInfo
+ // HNop
// HThrow
// HTryBoundary
+ // All volatile field access e.g. HInstanceFieldGet
// TODO: Some of the instructions above may be safe to schedule (maybe as
// scheduling barriers).
return instruction->IsArrayGet() ||
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
index f7180a02d7..299fbc93f3 100644
--- a/compiler/optimizing/scheduler.h
+++ b/compiler/optimizing/scheduler.h
@@ -19,6 +19,7 @@
#include <fstream>
+#include "base/macros.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "base/stl_util.h"
@@ -28,7 +29,7 @@
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
// General description of instruction scheduling.
//
diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc
index 965e1bd9f4..3f931c4c49 100644
--- a/compiler/optimizing/scheduler_arm.cc
+++ b/compiler/optimizing/scheduler_arm.cc
@@ -23,7 +23,7 @@
#include "mirror/array-inl.h"
#include "mirror/string.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
using helpers::Int32ConstantFrom;
@@ -669,7 +669,7 @@ void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) {
}
case DataType::Type::kReference: {
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency;
} else {
if (index->IsConstant()) {
@@ -937,7 +937,7 @@ void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruct
break;
case DataType::Type::kReference:
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
} else {
diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h
index d11222d9f4..0da21c187f 100644
--- a/compiler/optimizing/scheduler_arm.h
+++ b/compiler/optimizing/scheduler_arm.h
@@ -17,14 +17,12 @@
#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
+#include "base/macros.h"
#include "code_generator_arm_vixl.h"
#include "scheduler.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
-// TODO: Replace CodeGeneratorARMType with CodeGeneratorARMVIXL everywhere?
-typedef CodeGeneratorARMVIXL CodeGeneratorARMType;
-
// AArch32 instruction latencies.
// We currently assume that all ARM CPUs share the same instruction latency list.
// The following latencies were tuned based on performance experiments and
@@ -49,10 +47,10 @@ static constexpr uint32_t kArmNopLatency = 2;
static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
-class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
+class SchedulingLatencyVisitorARM final : public SchedulingLatencyVisitor {
public:
explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
- : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {}
+ : codegen_(down_cast<CodeGeneratorARMVIXL*>(codegen)) {}
// Default visitor for instructions not handled specifically below.
void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
@@ -133,7 +131,7 @@ class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
// The latency setting for each HInstruction depends on how CodeGenerator may generate code,
// latency visitors may query CodeGenerator for such information for accurate latency settings.
- CodeGeneratorARMType* codegen_;
+ CodeGeneratorARMVIXL* codegen_;
};
class HSchedulerARM : public HScheduler {
diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc
index 4f504c2100..3071afd951 100644
--- a/compiler/optimizing/scheduler_arm64.cc
+++ b/compiler/optimizing/scheduler_arm64.cc
@@ -20,7 +20,7 @@
#include "mirror/array-inl.h"
#include "mirror/string.h"
-namespace art {
+namespace art HIDDEN {
namespace arm64 {
void SchedulingLatencyVisitorARM64::VisitBinaryOperation(HBinaryOperation* instr) {
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
index ba5a743545..ec41577e9d 100644
--- a/compiler/optimizing/scheduler_arm64.h
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
+#include "base/macros.h"
#include "scheduler.h"
-namespace art {
+namespace art HIDDEN {
namespace arm64 {
static constexpr uint32_t kArm64MemoryLoadLatency = 5;
@@ -55,7 +56,7 @@ static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60;
static constexpr uint32_t kArm64SIMDDivFloatLatency = 30;
static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10;
-class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
+class SchedulingLatencyVisitorARM64 final : public SchedulingLatencyVisitor {
public:
// Default visitor for instructions not handled specifically below.
void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc
index a1cc202a89..165bfe3d94 100644
--- a/compiler/optimizing/scheduler_test.cc
+++ b/compiler/optimizing/scheduler_test.cc
@@ -17,6 +17,7 @@
#include "scheduler.h"
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "codegen_test_utils.h"
#include "common_compiler_test.h"
@@ -34,7 +35,7 @@
#include "scheduler_arm.h"
#endif
-namespace art {
+namespace art HIDDEN {
// Return all combinations of ISA and code generator that are executable on
// hardware, or on simulator, and that we'd like to test.
@@ -65,7 +66,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
return v;
}
-class SchedulerTest : public OptimizingUnitTest {
+class SchedulerTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
public:
SchedulerTest() : graph_(CreateGraph()) { }
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
index 54053820ca..6a10440d11 100644
--- a/compiler/optimizing/select_generator.cc
+++ b/compiler/optimizing/select_generator.cc
@@ -16,10 +16,10 @@
#include "select_generator.h"
-#include "base/scoped_arena_containers.h"
+#include "optimizing/nodes.h"
#include "reference_type_propagation.h"
-namespace art {
+namespace art HIDDEN {
static constexpr size_t kMaxInstructionsInBranch = 1u;
@@ -69,156 +69,277 @@ static bool BlocksMergeTogether(HBasicBlock* block1, HBasicBlock* block2) {
return block1->GetSingleSuccessor() == block2->GetSingleSuccessor();
}
-// Returns nullptr if `block` has either no phis or there is more than one phi
-// with different inputs at `index1` and `index2`. Otherwise returns that phi.
-static HPhi* GetSingleChangedPhi(HBasicBlock* block, size_t index1, size_t index2) {
+// Returns nullptr if `block` has either no phis or there is more than one phi. Otherwise returns
+// that phi.
+static HPhi* GetSinglePhi(HBasicBlock* block, size_t index1, size_t index2) {
DCHECK_NE(index1, index2);
HPhi* select_phi = nullptr;
for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
HPhi* phi = it.Current()->AsPhi();
- if (phi->InputAt(index1) != phi->InputAt(index2)) {
- if (select_phi == nullptr) {
- // First phi with different inputs for the two indices found.
- select_phi = phi;
- } else {
- // More than one phis has different inputs for the two indices.
- return nullptr;
- }
+ if (select_phi == nullptr) {
+ // First phi found.
+ select_phi = phi;
+ } else {
+ // More than one phi found, return null.
+ return nullptr;
}
}
return select_phi;
}
-bool HSelectGenerator::Run() {
- bool didSelect = false;
- // Select cache with local allocator.
- ScopedArenaAllocator allocator(graph_->GetArenaStack());
- ScopedArenaSafeMap<HInstruction*, HSelect*> cache(
- std::less<HInstruction*>(), allocator.Adapter(kArenaAllocSelectGenerator));
+bool HSelectGenerator::TryGenerateSelectSimpleDiamondPattern(
+ HBasicBlock* block, ScopedArenaSafeMap<HInstruction*, HSelect*>* cache) {
+ DCHECK(block->GetLastInstruction()->IsIf());
+ HIf* if_instruction = block->GetLastInstruction()->AsIf();
+ HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
+ HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
+ DCHECK_NE(true_block, false_block);
- // Iterate in post order in the unlikely case that removing one occurrence of
- // the selection pattern empties a branch block of another occurrence.
- for (HBasicBlock* block : graph_->GetPostOrder()) {
- if (!block->EndsWithIf()) continue;
+ if (!IsSimpleBlock(true_block) ||
+ !IsSimpleBlock(false_block) ||
+ !BlocksMergeTogether(true_block, false_block)) {
+ return false;
+ }
+ HBasicBlock* merge_block = true_block->GetSingleSuccessor();
- // Find elements of the diamond pattern.
- HIf* if_instruction = block->GetLastInstruction()->AsIf();
- HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
- HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
- DCHECK_NE(true_block, false_block);
+ // If the branches are not empty, move instructions in front of the If.
+ // TODO(dbrazdil): This puts an instruction between If and its condition.
+ // Implement moving of conditions to first users if possible.
+ while (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) {
+ HInstruction* instr = true_block->GetFirstInstruction();
+ DCHECK(!instr->CanThrow());
+ instr->MoveBefore(if_instruction);
+ }
+ while (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) {
+ HInstruction* instr = false_block->GetFirstInstruction();
+ DCHECK(!instr->CanThrow());
+ instr->MoveBefore(if_instruction);
+ }
+ DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn());
+ DCHECK(false_block->IsSingleGoto() || false_block->IsSingleReturn());
- if (!IsSimpleBlock(true_block) ||
- !IsSimpleBlock(false_block) ||
- !BlocksMergeTogether(true_block, false_block)) {
- continue;
- }
- HBasicBlock* merge_block = true_block->GetSingleSuccessor();
-
- // If the branches are not empty, move instructions in front of the If.
- // TODO(dbrazdil): This puts an instruction between If and its condition.
- // Implement moving of conditions to first users if possible.
- while (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) {
- HInstruction* instr = true_block->GetFirstInstruction();
- DCHECK(!instr->CanThrow());
- instr->MoveBefore(if_instruction);
- }
- while (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) {
- HInstruction* instr = false_block->GetFirstInstruction();
- DCHECK(!instr->CanThrow());
- instr->MoveBefore(if_instruction);
- }
- DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn());
- DCHECK(false_block->IsSingleGoto() || false_block->IsSingleReturn());
-
- // Find the resulting true/false values.
- size_t predecessor_index_true = merge_block->GetPredecessorIndexOf(true_block);
- size_t predecessor_index_false = merge_block->GetPredecessorIndexOf(false_block);
- DCHECK_NE(predecessor_index_true, predecessor_index_false);
-
- bool both_successors_return = true_block->IsSingleReturn() && false_block->IsSingleReturn();
- HPhi* phi = GetSingleChangedPhi(merge_block, predecessor_index_true, predecessor_index_false);
-
- HInstruction* true_value = nullptr;
- HInstruction* false_value = nullptr;
- if (both_successors_return) {
- true_value = true_block->GetFirstInstruction()->InputAt(0);
- false_value = false_block->GetFirstInstruction()->InputAt(0);
- } else if (phi != nullptr) {
- true_value = phi->InputAt(predecessor_index_true);
- false_value = phi->InputAt(predecessor_index_false);
- } else {
- continue;
- }
- DCHECK(both_successors_return || phi != nullptr);
-
- // Create the Select instruction and insert it in front of the If.
- HInstruction* condition = if_instruction->InputAt(0);
- HSelect* select = new (graph_->GetAllocator()) HSelect(condition,
- true_value,
- false_value,
- if_instruction->GetDexPc());
- if (both_successors_return) {
- if (true_value->GetType() == DataType::Type::kReference) {
- DCHECK(false_value->GetType() == DataType::Type::kReference);
- ReferenceTypePropagation::FixUpInstructionType(select, graph_->GetHandleCache());
- }
- } else if (phi->GetType() == DataType::Type::kReference) {
- select->SetReferenceTypeInfo(phi->GetReferenceTypeInfo());
- }
- block->InsertInstructionBefore(select, if_instruction);
+ // Find the resulting true/false values.
+ size_t predecessor_index_true = merge_block->GetPredecessorIndexOf(true_block);
+ size_t predecessor_index_false = merge_block->GetPredecessorIndexOf(false_block);
+ DCHECK_NE(predecessor_index_true, predecessor_index_false);
- // Remove the true branch which removes the corresponding Phi
- // input if needed. If left only with the false branch, the Phi is
- // automatically removed.
- if (both_successors_return) {
- false_block->GetFirstInstruction()->ReplaceInput(select, 0);
- } else {
- phi->ReplaceInput(select, predecessor_index_false);
+ bool both_successors_return = true_block->IsSingleReturn() && false_block->IsSingleReturn();
+ // TODO(solanes): Extend to support multiple phis? e.g.
+ // int a, b;
+ // if (bool) {
+ // a = 0; b = 1;
+ // } else {
+ // a = 1; b = 2;
+ // }
+ // // use a and b
+ HPhi* phi = GetSinglePhi(merge_block, predecessor_index_true, predecessor_index_false);
+
+ HInstruction* true_value = nullptr;
+ HInstruction* false_value = nullptr;
+ if (both_successors_return) {
+ true_value = true_block->GetFirstInstruction()->InputAt(0);
+ false_value = false_block->GetFirstInstruction()->InputAt(0);
+ } else if (phi != nullptr) {
+ true_value = phi->InputAt(predecessor_index_true);
+ false_value = phi->InputAt(predecessor_index_false);
+ } else {
+ return false;
+ }
+ DCHECK(both_successors_return || phi != nullptr);
+
+ // Create the Select instruction and insert it in front of the If.
+ HInstruction* condition = if_instruction->InputAt(0);
+ HSelect* select = new (graph_->GetAllocator()) HSelect(condition,
+ true_value,
+ false_value,
+ if_instruction->GetDexPc());
+ if (both_successors_return) {
+ if (true_value->GetType() == DataType::Type::kReference) {
+ DCHECK(false_value->GetType() == DataType::Type::kReference);
+ ReferenceTypePropagation::FixUpInstructionType(select, graph_->GetHandleCache());
}
+ } else if (phi->GetType() == DataType::Type::kReference) {
+ select->SetReferenceTypeInfoIfValid(phi->GetReferenceTypeInfo());
+ }
+ block->InsertInstructionBefore(select, if_instruction);
- bool only_two_predecessors = (merge_block->GetPredecessors().size() == 2u);
- true_block->DisconnectAndDelete();
+ // Remove the true branch which removes the corresponding Phi
+ // input if needed. If left only with the false branch, the Phi is
+ // automatically removed.
+ if (both_successors_return) {
+ false_block->GetFirstInstruction()->ReplaceInput(select, 0);
+ } else {
+ phi->ReplaceInput(select, predecessor_index_false);
+ }
+
+ bool only_two_predecessors = (merge_block->GetPredecessors().size() == 2u);
+ true_block->DisconnectAndDelete();
+
+ // Merge remaining blocks which are now connected with Goto.
+ DCHECK_EQ(block->GetSingleSuccessor(), false_block);
+ block->MergeWith(false_block);
+ if (!both_successors_return && only_two_predecessors) {
+ DCHECK_EQ(only_two_predecessors, phi->GetBlock() == nullptr);
+ DCHECK_EQ(block->GetSingleSuccessor(), merge_block);
+ block->MergeWith(merge_block);
+ }
- // Merge remaining blocks which are now connected with Goto.
- DCHECK_EQ(block->GetSingleSuccessor(), false_block);
- block->MergeWith(false_block);
- if (!both_successors_return && only_two_predecessors) {
- DCHECK_EQ(only_two_predecessors, phi->GetBlock() == nullptr);
- DCHECK_EQ(block->GetSingleSuccessor(), merge_block);
- block->MergeWith(merge_block);
+ MaybeRecordStat(stats_, MethodCompilationStat::kSelectGenerated);
+
+ // Very simple way of finding common subexpressions in the generated HSelect statements
+ // (since this runs after GVN). Lookup by condition, and reuse latest one if possible
+ // (due to post order, latest select is most likely replacement). If needed, we could
+ // improve this by e.g. using the operands in the map as well.
+ auto it = cache->find(condition);
+ if (it == cache->end()) {
+ cache->Put(condition, select);
+ } else {
+ // Found cached value. See if latest can replace cached in the HIR.
+ HSelect* cached_select = it->second;
+ DCHECK_EQ(cached_select->GetCondition(), select->GetCondition());
+ if (cached_select->GetTrueValue() == select->GetTrueValue() &&
+ cached_select->GetFalseValue() == select->GetFalseValue() &&
+ select->StrictlyDominates(cached_select)) {
+ cached_select->ReplaceWith(select);
+ cached_select->GetBlock()->RemoveInstruction(cached_select);
}
+ it->second = select; // always cache latest
+ }
+
+ // No need to update dominance information, as we are simplifying
+ // a simple diamond shape, where the join block is merged with the
+ // entry block. Any following blocks would have had the join block
+ // as a dominator, and `MergeWith` handles changing that to the
+ // entry block
+ return true;
+}
- MaybeRecordStat(stats_, MethodCompilationStat::kSelectGenerated);
+HBasicBlock* HSelectGenerator::TryFixupDoubleDiamondPattern(HBasicBlock* block) {
+ DCHECK(block->GetLastInstruction()->IsIf());
+ HIf* if_instruction = block->GetLastInstruction()->AsIf();
+ HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
+ HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
+ DCHECK_NE(true_block, false_block);
- // Very simple way of finding common subexpressions in the generated HSelect statements
- // (since this runs after GVN). Lookup by condition, and reuse latest one if possible
- // (due to post order, latest select is most likely replacement). If needed, we could
- // improve this by e.g. using the operands in the map as well.
- auto it = cache.find(condition);
- if (it == cache.end()) {
- cache.Put(condition, select);
+ // One branch must be a single goto, and the other one the inner if.
+ if (true_block->IsSingleGoto() == false_block->IsSingleGoto()) {
+ return nullptr;
+ }
+
+ HBasicBlock* single_goto = true_block->IsSingleGoto() ? true_block : false_block;
+ HBasicBlock* inner_if_block = true_block->IsSingleGoto() ? false_block : true_block;
+
+ // The innner if branch has to be a block with just a comparison and an if.
+ if (!inner_if_block->EndsWithIf() ||
+ inner_if_block->GetLastInstruction()->AsIf()->InputAt(0) !=
+ inner_if_block->GetFirstInstruction() ||
+ inner_if_block->GetLastInstruction()->GetPrevious() !=
+ inner_if_block->GetFirstInstruction() ||
+ !inner_if_block->GetFirstInstruction()->IsCondition()) {
+ return nullptr;
+ }
+
+ HIf* inner_if_instruction = inner_if_block->GetLastInstruction()->AsIf();
+ HBasicBlock* inner_if_true_block = inner_if_instruction->IfTrueSuccessor();
+ HBasicBlock* inner_if_false_block = inner_if_instruction->IfFalseSuccessor();
+ if (!inner_if_true_block->IsSingleGoto() || !inner_if_false_block->IsSingleGoto()) {
+ return nullptr;
+ }
+
+ // One must merge into the outer condition and the other must not.
+ if (BlocksMergeTogether(single_goto, inner_if_true_block) ==
+ BlocksMergeTogether(single_goto, inner_if_false_block)) {
+ return nullptr;
+ }
+
+ // First merge merges the outer if with one of the inner if branches. The block must be a Phi and
+ // a Goto.
+ HBasicBlock* first_merge = single_goto->GetSingleSuccessor();
+ if (first_merge->GetNumberOfPredecessors() != 2 ||
+ first_merge->GetPhis().CountSize() != 1 ||
+ !first_merge->GetLastInstruction()->IsGoto() ||
+ first_merge->GetFirstInstruction() != first_merge->GetLastInstruction()) {
+ return nullptr;
+ }
+
+ HPhi* first_phi = first_merge->GetFirstPhi()->AsPhi();
+
+ // Second merge is first_merge and the remainder branch merging. It must be phi + goto, or phi +
+ // return. Depending on the first merge, we define the second merge.
+ HBasicBlock* merges_into_second_merge =
+ BlocksMergeTogether(single_goto, inner_if_true_block)
+ ? inner_if_false_block
+ : inner_if_true_block;
+ if (!BlocksMergeTogether(first_merge, merges_into_second_merge)) {
+ return nullptr;
+ }
+
+ HBasicBlock* second_merge = merges_into_second_merge->GetSingleSuccessor();
+ if (second_merge->GetNumberOfPredecessors() != 2 ||
+ second_merge->GetPhis().CountSize() != 1 ||
+ !(second_merge->GetLastInstruction()->IsGoto() ||
+ second_merge->GetLastInstruction()->IsReturn()) ||
+ second_merge->GetFirstInstruction() != second_merge->GetLastInstruction()) {
+ return nullptr;
+ }
+
+ size_t index = second_merge->GetPredecessorIndexOf(merges_into_second_merge);
+ HPhi* second_phi = second_merge->GetFirstPhi()->AsPhi();
+
+ // Merge the phis.
+ first_phi->AddInput(second_phi->InputAt(index));
+ merges_into_second_merge->ReplaceSuccessor(second_merge, first_merge);
+ second_phi->ReplaceWith(first_phi);
+ second_merge->RemovePhi(second_phi);
+
+ // Sort out the new domination before merging the blocks
+ DCHECK_EQ(second_merge->GetSinglePredecessor(), first_merge);
+ second_merge->GetDominator()->RemoveDominatedBlock(second_merge);
+ second_merge->SetDominator(first_merge);
+ first_merge->AddDominatedBlock(second_merge);
+ first_merge->MergeWith(second_merge);
+
+ // No need to update dominance information. There's a chance that `merges_into_second_merge`
+ // doesn't come before `first_merge` but we don't need to fix it since `merges_into_second_merge`
+ // will disappear from the graph altogether when doing the follow-up
+ // TryGenerateSelectSimpleDiamondPattern.
+
+ return inner_if_block;
+}
+
+bool HSelectGenerator::Run() {
+ bool did_select = false;
+ // Select cache with local allocator.
+ ScopedArenaAllocator allocator(graph_->GetArenaStack());
+ ScopedArenaSafeMap<HInstruction*, HSelect*> cache(std::less<HInstruction*>(),
+ allocator.Adapter(kArenaAllocSelectGenerator));
+
+ // Iterate in post order in the unlikely case that removing one occurrence of
+ // the selection pattern empties a branch block of another occurrence.
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
+ if (!block->EndsWithIf()) {
+ continue;
+ }
+
+ if (TryGenerateSelectSimpleDiamondPattern(block, &cache)) {
+ did_select = true;
} else {
- // Found cached value. See if latest can replace cached in the HIR.
- HSelect* cached = it->second;
- DCHECK_EQ(cached->GetCondition(), select->GetCondition());
- if (cached->GetTrueValue() == select->GetTrueValue() &&
- cached->GetFalseValue() == select->GetFalseValue() &&
- select->StrictlyDominates(cached)) {
- cached->ReplaceWith(select);
- cached->GetBlock()->RemoveInstruction(cached);
+ // Try to fix up the odd version of the double diamond pattern. If we could do it, it means
+ // that we can generate two selects.
+ HBasicBlock* inner_if_block = TryFixupDoubleDiamondPattern(block);
+ if (inner_if_block != nullptr) {
+ // Generate the selects now since `inner_if_block` should be after `block` in PostOrder.
+ bool result = TryGenerateSelectSimpleDiamondPattern(inner_if_block, &cache);
+ DCHECK(result);
+ result = TryGenerateSelectSimpleDiamondPattern(block, &cache);
+ DCHECK(result);
+ did_select = true;
}
- it->second = select; // always cache latest
}
-
- // No need to update dominance information, as we are simplifying
- // a simple diamond shape, where the join block is merged with the
- // entry block. Any following blocks would have had the join block
- // as a dominator, and `MergeWith` handles changing that to the
- // entry block.
- didSelect = true;
}
- return didSelect;
+
+ return did_select;
}
} // namespace art
diff --git a/compiler/optimizing/select_generator.h b/compiler/optimizing/select_generator.h
index 30ac8a86eb..7aa0803d89 100644
--- a/compiler/optimizing/select_generator.h
+++ b/compiler/optimizing/select_generator.h
@@ -57,9 +57,12 @@
#ifndef ART_COMPILER_OPTIMIZING_SELECT_GENERATOR_H_
#define ART_COMPILER_OPTIMIZING_SELECT_GENERATOR_H_
+#include "base/macros.h"
+#include "base/scoped_arena_containers.h"
#include "optimization.h"
+#include "optimizing/nodes.h"
-namespace art {
+namespace art HIDDEN {
class HSelectGenerator : public HOptimization {
public:
@@ -72,6 +75,43 @@ class HSelectGenerator : public HOptimization {
static constexpr const char* kSelectGeneratorPassName = "select_generator";
private:
+ bool TryGenerateSelectSimpleDiamondPattern(HBasicBlock* block,
+ ScopedArenaSafeMap<HInstruction*, HSelect*>* cache);
+
+ // When generating code for nested ternary operators (e.g. `return (x > 100) ? 100 : ((x < -100) ?
+ // -100 : x);`), a dexer can generate a double diamond pattern but it is not a clear cut one due
+ // to the merging of the blocks. `TryFixupDoubleDiamondPattern` recognizes that pattern and fixes
+ // up the graph to have a clean double diamond that `TryGenerateSelectSimpleDiamondPattern` can
+ // use to generate selects.
+ //
+ // In ASCII, it turns:
+ //
+ // 1 (outer if)
+ // / \
+ // 2 3 (inner if)
+ // | / \
+ // | 4 5
+ // \/ |
+ // 6 |
+ // \ |
+ // 7
+ // |
+ // 8
+ // into:
+ // 1 (outer if)
+ // / \
+ // 2 3 (inner if)
+ // | / \
+ // | 4 5
+ // \/ /
+ // 6
+ // |
+ // 8
+ //
+ // In short, block 7 disappears and we merge 6 and 7. Now we have a diamond with {3,4,5,6}, and
+ // when that gets resolved we get another one with the outer if.
+ HBasicBlock* TryFixupDoubleDiamondPattern(HBasicBlock* block);
+
DISALLOW_COPY_AND_ASSIGN(HSelectGenerator);
};
diff --git a/compiler/optimizing/select_generator_test.cc b/compiler/optimizing/select_generator_test.cc
index b18d41abbb..fc9e150d92 100644
--- a/compiler/optimizing/select_generator_test.cc
+++ b/compiler/optimizing/select_generator_test.cc
@@ -17,12 +17,13 @@
#include "select_generator.h"
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "side_effects_analysis.h"
-namespace art {
+namespace art HIDDEN {
class SelectGeneratorTest : public OptimizingUnitTest {
protected:
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 17cf3d3477..277edff33e 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -34,7 +34,7 @@
#include "runtime.h"
#include "scoped_thread_state_change-inl.h"
-namespace art {
+namespace art HIDDEN {
static bool IsInBootImage(ArtMethod* method) {
gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -63,9 +63,9 @@ HInvokeStaticOrDirect::DispatchInfo HSharpening::SharpenLoadMethod(
bool for_interface_call,
CodeGenerator* codegen) {
if (kIsDebugBuild) {
- ScopedObjectAccess soa(Thread::Current()); // Required for GetDeclaringClass below.
+ ScopedObjectAccess soa(Thread::Current()); // Required for `IsStringConstructor()` below.
DCHECK(callee != nullptr);
- DCHECK(!(callee->IsConstructor() && callee->GetDeclaringClass()->IsStringClass()));
+ DCHECK(!callee->IsStringConstructor());
}
MethodLoadKind method_load_kind;
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index 975366918c..6dfe904f27 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_SHARPENING_H_
#define ART_COMPILER_OPTIMIZING_SHARPENING_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
class DexCompilationUnit;
diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc
index ba97b43de9..56719b100e 100644
--- a/compiler/optimizing/side_effects_analysis.cc
+++ b/compiler/optimizing/side_effects_analysis.cc
@@ -16,7 +16,7 @@
#include "side_effects_analysis.h"
-namespace art {
+namespace art HIDDEN {
bool SideEffectsAnalysis::Run() {
// Inlining might have created more blocks, so we need to increase the size
diff --git a/compiler/optimizing/side_effects_analysis.h b/compiler/optimizing/side_effects_analysis.h
index 56a01e63f1..47fcdc5d1b 100644
--- a/compiler/optimizing/side_effects_analysis.h
+++ b/compiler/optimizing/side_effects_analysis.h
@@ -18,10 +18,11 @@
#define ART_COMPILER_OPTIMIZING_SIDE_EFFECTS_ANALYSIS_H_
#include "base/arena_containers.h"
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
class SideEffectsAnalysis : public HOptimization {
public:
diff --git a/compiler/optimizing/side_effects_test.cc b/compiler/optimizing/side_effects_test.cc
index 268798ca7d..f2b781dfa4 100644
--- a/compiler/optimizing/side_effects_test.cc
+++ b/compiler/optimizing/side_effects_test.cc
@@ -16,10 +16,11 @@
#include <gtest/gtest.h>
+#include "base/macros.h"
#include "data_type.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
// Only runtime types other than void are allowed.
static const DataType::Type kTestTypes[] = {
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 67ee83c9dd..a658252e69 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -27,7 +27,7 @@
#include "scoped_thread_state_change-inl.h"
#include "ssa_phi_elimination.h"
-namespace art {
+namespace art HIDDEN {
void SsaBuilder::FixNullConstantType() {
// The order doesn't matter here.
@@ -538,7 +538,6 @@ GraphAnalysisResult SsaBuilder::BuildSsa() {
// Compute type of reference type instructions. The pass assumes that
// NullConstant has been fixed up.
ReferenceTypePropagation(graph_,
- class_loader_,
dex_cache_,
/* is_first_run= */ true).Run();
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index a7d4e0ebd3..99a5469932 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -17,12 +17,13 @@
#ifndef ART_COMPILER_OPTIMIZING_SSA_BUILDER_H_
#define ART_COMPILER_OPTIMIZING_SSA_BUILDER_H_
+#include "base/macros.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
/**
* Transforms a graph into SSA form. The liveness guarantees of
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 18942a1823..317e0999d7 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -21,7 +21,7 @@
#include "linear_order.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
void SsaLivenessAnalysis::Analyze() {
// Compute the linear order directly in the graph's data structure
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 7f31585f34..cc2b49cf22 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -21,11 +21,12 @@
#include "base/intrusive_forward_list.h"
#include "base/iteration_range.h"
+#include "base/macros.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
class SsaLivenessAnalysis;
diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc
index a477893d57..2df0f34c7d 100644
--- a/compiler/optimizing/ssa_liveness_analysis_test.cc
+++ b/compiler/optimizing/ssa_liveness_analysis_test.cc
@@ -20,12 +20,13 @@
#include "arch/instruction_set_features.h"
#include "base/arena_allocator.h"
#include "base/arena_containers.h"
+#include "base/macros.h"
#include "code_generator.h"
#include "driver/compiler_options.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
-namespace art {
+namespace art HIDDEN {
class SsaLivenessAnalysisTest : public OptimizingUnitTest {
protected:
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index 8fd6962500..ce343dffec 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -21,7 +21,7 @@
#include "base/scoped_arena_containers.h"
#include "base/bit_vector-inl.h"
-namespace art {
+namespace art HIDDEN {
bool SsaDeadPhiElimination::Run() {
MarkDeadPhis();
diff --git a/compiler/optimizing/ssa_phi_elimination.h b/compiler/optimizing/ssa_phi_elimination.h
index c5cc752ffc..f606f928fa 100644
--- a/compiler/optimizing/ssa_phi_elimination.h
+++ b/compiler/optimizing/ssa_phi_elimination.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_
#define ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
/**
* Optimization phase that removes dead phis from the graph. Dead phis are unused
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index e679893af2..980493db34 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -17,6 +17,7 @@
#include "android-base/stringprintf.h"
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "dex/dex_file.h"
#include "dex/dex_instruction.h"
@@ -27,9 +28,9 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
-class SsaTest : public OptimizingUnitTest {
+class SsaTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
void TestCode(const std::vector<uint16_t>& data, const char* expected);
};
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index f55bbee1c8..1a368ed347 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -20,6 +20,7 @@
#include <vector>
#include "art_method-inl.h"
+#include "base/globals.h"
#include "base/stl_util.h"
#include "class_linker.h"
#include "dex/dex_file.h"
@@ -32,7 +33,7 @@
#include "scoped_thread_state_change-inl.h"
#include "stack_map.h"
-namespace art {
+namespace art HIDDEN {
constexpr static bool kVerifyStackMaps = kIsDebugBuild;
@@ -49,7 +50,8 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes,
size_t core_spill_mask,
size_t fp_spill_mask,
uint32_t num_dex_registers,
- bool baseline) {
+ bool baseline,
+ bool debuggable) {
DCHECK(!in_method_) << "Mismatched Begin/End calls";
in_method_ = true;
DCHECK_EQ(packed_frame_size_, 0u) << "BeginMethod was already called";
@@ -60,6 +62,7 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes,
fp_spill_mask_ = fp_spill_mask;
num_dex_registers_ = num_dex_registers;
baseline_ = baseline;
+ debuggable_ = debuggable;
if (kVerifyStackMaps) {
dchecks_.emplace_back([=](const CodeInfo& code_info) {
@@ -99,16 +102,21 @@ void StackMapStream::EndMethod(size_t code_size) {
}
}
-void StackMapStream::BeginStackMapEntry(uint32_t dex_pc,
- uint32_t native_pc_offset,
- uint32_t register_mask,
- BitVector* stack_mask,
- StackMap::Kind kind,
- bool needs_vreg_info) {
+void StackMapStream::BeginStackMapEntry(
+ uint32_t dex_pc,
+ uint32_t native_pc_offset,
+ uint32_t register_mask,
+ BitVector* stack_mask,
+ StackMap::Kind kind,
+ bool needs_vreg_info,
+ const std::vector<uint32_t>& dex_pc_list_for_catch_verification) {
DCHECK(in_method_) << "Call BeginMethod first";
DCHECK(!in_stack_map_) << "Mismatched Begin/End calls";
in_stack_map_ = true;
+ DCHECK_IMPLIES(!dex_pc_list_for_catch_verification.empty(), kind == StackMap::Kind::Catch);
+ DCHECK_IMPLIES(!dex_pc_list_for_catch_verification.empty(), kIsDebugBuild);
+
current_stack_map_ = BitTableBuilder<StackMap>::Entry();
current_stack_map_[StackMap::kKind] = static_cast<uint32_t>(kind);
current_stack_map_[StackMap::kPackedNativePc] =
@@ -149,7 +157,8 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc,
instruction_set_);
CHECK_EQ(stack_map.Row(), stack_map_index);
} else if (kind == StackMap::Kind::Catch) {
- StackMap stack_map = code_info.GetCatchStackMapForDexPc(dex_pc);
+ StackMap stack_map = code_info.GetCatchStackMapForDexPc(
+ ArrayRef<const uint32_t>(dex_pc_list_for_catch_verification));
CHECK_EQ(stack_map.Row(), stack_map_index);
}
StackMap stack_map = code_info.GetStackMapAt(stack_map_index);
@@ -367,6 +376,7 @@ ScopedArenaVector<uint8_t> StackMapStream::Encode() {
uint32_t flags = (inline_infos_.size() > 0) ? CodeInfo::kHasInlineInfo : 0;
flags |= baseline_ ? CodeInfo::kIsBaseline : 0;
+ flags |= debuggable_ ? CodeInfo::kIsDebuggable : 0;
DCHECK_LE(flags, kVarintMax); // Ensure flags can be read directly as byte.
uint32_t bit_table_flags = 0;
ForEachBitTable([&bit_table_flags](size_t i, auto bit_table) {
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 27145a174c..643af2da94 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -21,6 +21,7 @@
#include "base/arena_bit_vector.h"
#include "base/bit_table.h"
#include "base/bit_vector-inl.h"
+#include "base/macros.h"
#include "base/memory_region.h"
#include "base/scoped_arena_containers.h"
#include "base/value_object.h"
@@ -28,7 +29,7 @@
#include "nodes.h"
#include "stack_map.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
@@ -64,15 +65,19 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> {
size_t core_spill_mask,
size_t fp_spill_mask,
uint32_t num_dex_registers,
- bool baseline = false);
+ bool baseline,
+ bool debuggable);
void EndMethod(size_t code_size);
- void BeginStackMapEntry(uint32_t dex_pc,
- uint32_t native_pc_offset,
- uint32_t register_mask = 0,
- BitVector* sp_mask = nullptr,
- StackMap::Kind kind = StackMap::Kind::Default,
- bool needs_vreg_info = true);
+ void BeginStackMapEntry(
+ uint32_t dex_pc,
+ uint32_t native_pc_offset,
+ uint32_t register_mask = 0,
+ BitVector* sp_mask = nullptr,
+ StackMap::Kind kind = StackMap::Kind::Default,
+ bool needs_vreg_info = true,
+ const std::vector<uint32_t>& dex_pc_list_for_catch_verification = std::vector<uint32_t>());
+
void EndStackMapEntry();
void AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value) {
@@ -125,6 +130,7 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> {
uint32_t fp_spill_mask_ = 0;
uint32_t num_dex_registers_ = 0;
bool baseline_;
+ bool debuggable_;
BitTableBuilder<StackMap> stack_maps_;
BitTableBuilder<RegisterMask> register_masks_;
BitmapTableBuilder stack_masks_;
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index f6a739e15a..a2c30e7681 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -18,12 +18,13 @@
#include "art_method.h"
#include "base/arena_bit_vector.h"
+#include "base/macros.h"
#include "base/malloc_arena_pool.h"
#include "stack_map_stream.h"
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
// Check that the stack mask of given stack map is identical
// to the given bit vector. Returns true if they are same.
@@ -52,7 +53,12 @@ TEST(StackMapTest, Test1) {
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
- stream.BeginMethod(32, 0, 0, 2);
+ stream.BeginMethod(/* frame_size_in_bytes= */ 32,
+ /* core_spill_mask= */ 0,
+ /* fp_spill_mask= */ 0,
+ /* num_dex_registers= */ 2,
+ /* baseline= */ false,
+ /* debuggable= */ false);
ArenaBitVector sp_mask(&allocator, 0, false);
size_t number_of_dex_registers = 2;
@@ -106,7 +112,12 @@ TEST(StackMapTest, Test2) {
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
- stream.BeginMethod(32, 0, 0, 2);
+ stream.BeginMethod(/* frame_size_in_bytes= */ 32,
+ /* core_spill_mask= */ 0,
+ /* fp_spill_mask= */ 0,
+ /* num_dex_registers= */ 2,
+ /* baseline= */ false,
+ /* debuggable= */ false);
ArtMethod art_method;
ArenaBitVector sp_mask1(&allocator, 0, true);
@@ -300,7 +311,12 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) {
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
- stream.BeginMethod(32, 0, 0, 2);
+ stream.BeginMethod(/* frame_size_in_bytes= */ 32,
+ /* core_spill_mask= */ 0,
+ /* fp_spill_mask= */ 0,
+ /* num_dex_registers= */ 2,
+ /* baseline= */ false,
+ /* debuggable= */ false);
ArtMethod art_method;
ArenaBitVector sp_mask1(&allocator, 0, true);
@@ -363,7 +379,12 @@ TEST(StackMapTest, TestNonLiveDexRegisters) {
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
- stream.BeginMethod(32, 0, 0, 2);
+ stream.BeginMethod(/* frame_size_in_bytes= */ 32,
+ /* core_spill_mask= */ 0,
+ /* fp_spill_mask= */ 0,
+ /* num_dex_registers= */ 2,
+ /* baseline= */ false,
+ /* debuggable= */ false);
ArenaBitVector sp_mask(&allocator, 0, false);
uint32_t number_of_dex_registers = 2;
@@ -411,7 +432,12 @@ TEST(StackMapTest, TestShareDexRegisterMap) {
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
- stream.BeginMethod(32, 0, 0, 2);
+ stream.BeginMethod(/* frame_size_in_bytes= */ 32,
+ /* core_spill_mask= */ 0,
+ /* fp_spill_mask= */ 0,
+ /* num_dex_registers= */ 2,
+ /* baseline= */ false,
+ /* debuggable= */ false);
ArenaBitVector sp_mask(&allocator, 0, false);
uint32_t number_of_dex_registers = 2;
@@ -467,7 +493,12 @@ TEST(StackMapTest, TestNoDexRegisterMap) {
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
- stream.BeginMethod(32, 0, 0, 1);
+ stream.BeginMethod(/* frame_size_in_bytes= */ 32,
+ /* core_spill_mask= */ 0,
+ /* fp_spill_mask= */ 0,
+ /* num_dex_registers= */ 1,
+ /* baseline= */ false,
+ /* debuggable= */ false);
ArenaBitVector sp_mask(&allocator, 0, false);
stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask);
@@ -512,7 +543,12 @@ TEST(StackMapTest, InlineTest) {
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
- stream.BeginMethod(32, 0, 0, 2);
+ stream.BeginMethod(/* frame_size_in_bytes= */ 32,
+ /* core_spill_mask= */ 0,
+ /* fp_spill_mask= */ 0,
+ /* num_dex_registers= */ 2,
+ /* baseline= */ false,
+ /* debuggable= */ false);
ArtMethod art_method;
ArenaBitVector sp_mask1(&allocator, 0, true);
@@ -702,7 +738,12 @@ TEST(StackMapTest, TestDeduplicateStackMask) {
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
- stream.BeginMethod(32, 0, 0, 0);
+ stream.BeginMethod(/* frame_size_in_bytes= */ 32,
+ /* core_spill_mask= */ 0,
+ /* fp_spill_mask= */ 0,
+ /* num_dex_registers= */ 0,
+ /* baseline= */ false,
+ /* debuggable= */ false);
ArenaBitVector sp_mask(&allocator, 0, true);
sp_mask.SetBit(1);
diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc
index a5f919c31c..7c0097c6f6 100644
--- a/compiler/optimizing/superblock_cloner.cc
+++ b/compiler/optimizing/superblock_cloner.cc
@@ -22,7 +22,7 @@
#include <sstream>
-namespace art {
+namespace art HIDDEN {
using HBasicBlockMap = SuperblockCloner::HBasicBlockMap;
using HInstructionMap = SuperblockCloner::HInstructionMap;
@@ -633,7 +633,7 @@ void SuperblockCloner::ConstructSubgraphClosedSSA() {
HPhi* phi = new (arena_) HPhi(arena_, kNoRegNumber, 0, value->GetType());
if (value->GetType() == DataType::Type::kReference) {
- phi->SetReferenceTypeInfo(value->GetReferenceTypeInfo());
+ phi->SetReferenceTypeInfoIfValid(value->GetReferenceTypeInfo());
}
exit_block->AddPhi(phi);
diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h
index 1f6ee74fbd..421701fb19 100644
--- a/compiler/optimizing/superblock_cloner.h
+++ b/compiler/optimizing/superblock_cloner.h
@@ -20,9 +20,10 @@
#include "base/arena_bit_vector.h"
#include "base/arena_containers.h"
#include "base/bit_vector-inl.h"
+#include "base/macros.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
class InductionVarRange;
diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc
index d8d68b7763..ea2563ea7d 100644
--- a/compiler/optimizing/superblock_cloner_test.cc
+++ b/compiler/optimizing/superblock_cloner_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "base/macros.h"
#include "graph_checker.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
@@ -21,7 +22,7 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
using HBasicBlockMap = SuperblockCloner::HBasicBlockMap;
using HInstructionMap = SuperblockCloner::HInstructionMap;
diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc
index 33823e2a11..76e7e0c32c 100644
--- a/compiler/optimizing/suspend_check_test.cc
+++ b/compiler/optimizing/suspend_check_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "base/macros.h"
#include "builder.h"
#include "dex/dex_instruction.h"
#include "nodes.h"
@@ -22,13 +23,13 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
/**
* Check that the HGraphBuilder adds suspend checks to backward branches.
*/
-class SuspendCheckTest : public OptimizingUnitTest {
+class SuspendCheckTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
void TestCode(const std::vector<uint16_t>& data);
};
diff --git a/compiler/optimizing/write_barrier_elimination.cc b/compiler/optimizing/write_barrier_elimination.cc
new file mode 100644
index 0000000000..eb70b670fe
--- /dev/null
+++ b/compiler/optimizing/write_barrier_elimination.cc
@@ -0,0 +1,161 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "write_barrier_elimination.h"
+
+#include "base/arena_allocator.h"
+#include "base/scoped_arena_allocator.h"
+#include "base/scoped_arena_containers.h"
+#include "optimizing/nodes.h"
+
+namespace art HIDDEN {
+
+class WBEVisitor final : public HGraphVisitor {
+ public:
+ WBEVisitor(HGraph* graph, OptimizingCompilerStats* stats)
+ : HGraphVisitor(graph),
+ scoped_allocator_(graph->GetArenaStack()),
+ current_write_barriers_(scoped_allocator_.Adapter(kArenaAllocWBE)),
+ stats_(stats) {}
+
+ void VisitBasicBlock(HBasicBlock* block) override {
+ // We clear the map to perform this optimization only in the same block. Doing it across blocks
+ // would entail non-trivial merging of states.
+ current_write_barriers_.clear();
+ HGraphVisitor::VisitBasicBlock(block);
+ }
+
+ void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override {
+ DCHECK(!instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()));
+
+ if (instruction->GetFieldType() != DataType::Type::kReference ||
+ instruction->GetValue()->IsNullConstant()) {
+ instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit);
+ return;
+ }
+
+ MaybeRecordStat(stats_, MethodCompilationStat::kPossibleWriteBarrier);
+ HInstruction* obj = HuntForOriginalReference(instruction->InputAt(0));
+ auto it = current_write_barriers_.find(obj);
+ if (it != current_write_barriers_.end()) {
+ DCHECK(it->second->IsInstanceFieldSet());
+ DCHECK(it->second->AsInstanceFieldSet()->GetWriteBarrierKind() !=
+ WriteBarrierKind::kDontEmit);
+ DCHECK_EQ(it->second->GetBlock(), instruction->GetBlock());
+ it->second->AsInstanceFieldSet()->SetWriteBarrierKind(WriteBarrierKind::kEmitNoNullCheck);
+ instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit);
+ MaybeRecordStat(stats_, MethodCompilationStat::kRemovedWriteBarrier);
+ } else {
+ const bool inserted = current_write_barriers_.insert({obj, instruction}).second;
+ DCHECK(inserted);
+ DCHECK(instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit);
+ }
+ }
+
+ void VisitStaticFieldSet(HStaticFieldSet* instruction) override {
+ DCHECK(!instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()));
+
+ if (instruction->GetFieldType() != DataType::Type::kReference ||
+ instruction->GetValue()->IsNullConstant()) {
+ instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit);
+ return;
+ }
+
+ MaybeRecordStat(stats_, MethodCompilationStat::kPossibleWriteBarrier);
+ HInstruction* cls = HuntForOriginalReference(instruction->InputAt(0));
+ auto it = current_write_barriers_.find(cls);
+ if (it != current_write_barriers_.end()) {
+ DCHECK(it->second->IsStaticFieldSet());
+ DCHECK(it->second->AsStaticFieldSet()->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit);
+ DCHECK_EQ(it->second->GetBlock(), instruction->GetBlock());
+ it->second->AsStaticFieldSet()->SetWriteBarrierKind(WriteBarrierKind::kEmitNoNullCheck);
+ instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit);
+ MaybeRecordStat(stats_, MethodCompilationStat::kRemovedWriteBarrier);
+ } else {
+ const bool inserted = current_write_barriers_.insert({cls, instruction}).second;
+ DCHECK(inserted);
+ DCHECK(instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit);
+ }
+ }
+
+ void VisitArraySet(HArraySet* instruction) override {
+ if (instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())) {
+ ClearCurrentValues();
+ }
+
+ if (instruction->GetComponentType() != DataType::Type::kReference ||
+ instruction->GetValue()->IsNullConstant()) {
+ instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit);
+ return;
+ }
+
+ HInstruction* arr = HuntForOriginalReference(instruction->InputAt(0));
+ MaybeRecordStat(stats_, MethodCompilationStat::kPossibleWriteBarrier);
+ auto it = current_write_barriers_.find(arr);
+ if (it != current_write_barriers_.end()) {
+ DCHECK(it->second->IsArraySet());
+ DCHECK(it->second->AsArraySet()->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit);
+ DCHECK_EQ(it->second->GetBlock(), instruction->GetBlock());
+ // We never skip the null check in ArraySets so that value is already set.
+ DCHECK(it->second->AsArraySet()->GetWriteBarrierKind() == WriteBarrierKind::kEmitNoNullCheck);
+ instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit);
+ MaybeRecordStat(stats_, MethodCompilationStat::kRemovedWriteBarrier);
+ } else {
+ const bool inserted = current_write_barriers_.insert({arr, instruction}).second;
+ DCHECK(inserted);
+ DCHECK(instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit);
+ }
+ }
+
+ void VisitInstruction(HInstruction* instruction) override {
+ if (instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())) {
+ ClearCurrentValues();
+ }
+ }
+
+ private:
+ void ClearCurrentValues() { current_write_barriers_.clear(); }
+
+ HInstruction* HuntForOriginalReference(HInstruction* ref) const {
+ // An original reference can be transformed by instructions like:
+ // i0 NewArray
+ // i1 HInstruction(i0) <-- NullCheck, BoundType, IntermediateAddress.
+ // i2 ArraySet(i1, index, value)
+ DCHECK(ref != nullptr);
+ while (ref->IsNullCheck() || ref->IsBoundType() || ref->IsIntermediateAddress()) {
+ ref = ref->InputAt(0);
+ }
+ return ref;
+ }
+
+ ScopedArenaAllocator scoped_allocator_;
+
+ // Stores a map of <Receiver, InstructionWhereTheWriteBarrierIs>.
+ // `InstructionWhereTheWriteBarrierIs` is used for DCHECKs only.
+ ScopedArenaHashMap<HInstruction*, HInstruction*> current_write_barriers_;
+
+ OptimizingCompilerStats* const stats_;
+
+ DISALLOW_COPY_AND_ASSIGN(WBEVisitor);
+};
+
+bool WriteBarrierElimination::Run() {
+ WBEVisitor wbe_visitor(graph_, stats_);
+ wbe_visitor.VisitReversePostOrder();
+ return true;
+}
+
+} // namespace art
diff --git a/compiler/optimizing/write_barrier_elimination.h b/compiler/optimizing/write_barrier_elimination.h
new file mode 100644
index 0000000000..a3769e7421
--- /dev/null
+++ b/compiler/optimizing/write_barrier_elimination.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_WRITE_BARRIER_ELIMINATION_H_
+#define ART_COMPILER_OPTIMIZING_WRITE_BARRIER_ELIMINATION_H_
+
+#include "base/macros.h"
+#include "optimization.h"
+
+namespace art HIDDEN {
+
+// Eliminates unnecessary write barriers from InstanceFieldSet, StaticFieldSet, and ArraySet.
+//
+// We can eliminate redundant write barriers as we don't need several for the same receiver. For
+// example:
+// MyObject o;
+// o.inner_obj = io;
+// o.inner_obj2 = io2;
+// o.inner_obj3 = io3;
+// We can keep the write barrier for `inner_obj` and remove the other two.
+//
+// In order to do this, we set the WriteBarrierKind of the instruction. The instruction's kind are
+// set to kEmitNoNullCheck (if this write barrier coalesced other write barriers, we don't want to
+// perform the null check optimization), or to kDontEmit (if the write barrier as a whole is not
+// needed).
+class WriteBarrierElimination : public HOptimization {
+ public:
+ WriteBarrierElimination(HGraph* graph,
+ OptimizingCompilerStats* stats,
+ const char* name = kWBEPassName)
+ : HOptimization(graph, name, stats) {}
+
+ bool Run() override;
+
+ static constexpr const char* kWBEPassName = "write_barrier_elimination";
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(WriteBarrierElimination);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_WRITE_BARRIER_ELIMINATION_H_
diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc
index b1abcf6747..e266618980 100644
--- a/compiler/optimizing/x86_memory_gen.cc
+++ b/compiler/optimizing/x86_memory_gen.cc
@@ -18,13 +18,13 @@
#include "code_generator.h"
#include "driver/compiler_options.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
/**
* Replace instructions with memory operand forms.
*/
-class MemoryOperandVisitor : public HGraphVisitor {
+class MemoryOperandVisitor final : public HGraphVisitor {
public:
MemoryOperandVisitor(HGraph* graph, bool do_implicit_null_checks)
: HGraphVisitor(graph),
diff --git a/compiler/optimizing/x86_memory_gen.h b/compiler/optimizing/x86_memory_gen.h
index 3f4178d58a..1cae1a5d3a 100644
--- a/compiler/optimizing/x86_memory_gen.h
+++ b/compiler/optimizing/x86_memory_gen.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_
#define ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
namespace x86 {
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index 0aaeaa5b4f..a122d3c9d3 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -38,7 +38,7 @@
#define __ assembler.
-namespace art {
+namespace art HIDDEN {
#ifdef ART_ENABLE_CODEGEN_arm
namespace arm {
@@ -208,6 +208,8 @@ std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline32(InstructionSet is
return x86::CreateTrampoline(&allocator, offset);
#endif
default:
+ UNUSED(abi);
+ UNUSED(offset);
LOG(FATAL) << "Unexpected InstructionSet: " << isa;
UNREACHABLE();
}
diff --git a/compiler/trampolines/trampoline_compiler.h b/compiler/trampolines/trampoline_compiler.h
index f0086b58d5..32e35ae1d6 100644
--- a/compiler/trampolines/trampoline_compiler.h
+++ b/compiler/trampolines/trampoline_compiler.h
@@ -22,9 +22,10 @@
#include <vector>
#include "arch/instruction_set.h"
+#include "base/macros.h"
#include "offsets.h"
-namespace art {
+namespace art HIDDEN {
enum EntryPointCallingConvention {
// ABI of invocations to a method's interpreter entry point.
@@ -36,12 +37,10 @@ enum EntryPointCallingConvention {
};
// Create code that will invoke the function held in thread local storage.
-std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline32(InstructionSet isa,
- EntryPointCallingConvention abi,
- ThreadOffset32 entry_point_offset);
-std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline64(InstructionSet isa,
- EntryPointCallingConvention abi,
- ThreadOffset64 entry_point_offset);
+EXPORT std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline32(
+ InstructionSet isa, EntryPointCallingConvention abi, ThreadOffset32 entry_point_offset);
+EXPORT std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline64(
+ InstructionSet isa, EntryPointCallingConvention abi, ThreadOffset64 entry_point_offset);
} // namespace art
diff --git a/compiler/utils/arm/assembler_arm_shared.h b/compiler/utils/arm/assembler_arm_shared.h
deleted file mode 100644
index 7464052d93..0000000000
--- a/compiler/utils/arm/assembler_arm_shared.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_SHARED_H_
-#define ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_SHARED_H_
-
-namespace art {
-namespace arm {
-
-enum LoadOperandType {
- kLoadSignedByte,
- kLoadUnsignedByte,
- kLoadSignedHalfword,
- kLoadUnsignedHalfword,
- kLoadWord,
- kLoadWordPair,
- kLoadSWord,
- kLoadDWord
-};
-
-enum StoreOperandType {
- kStoreByte,
- kStoreHalfword,
- kStoreWord,
- kStoreWordPair,
- kStoreSWord,
- kStoreDWord
-};
-
-} // namespace arm
-} // namespace art
-
-#endif // ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_SHARED_H_
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc
index 77f5d7081a..c7ca003530 100644
--- a/compiler/utils/arm/assembler_arm_vixl.cc
+++ b/compiler/utils/arm/assembler_arm_vixl.cc
@@ -26,7 +26,7 @@
using namespace vixl::aarch32; // NOLINT(build/namespaces)
-namespace art {
+namespace art HIDDEN {
namespace arm {
#ifdef ___
@@ -81,9 +81,7 @@ void ArmVIXLAssembler::MaybeUnpoisonHeapReference(vixl32::Register reg) {
}
void ArmVIXLAssembler::GenerateMarkingRegisterCheck(vixl32::Register temp, int code) {
- // The Marking Register is only used in the Baker read barrier configuration.
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
+ DCHECK(kReserveMarkingRegister);
vixl32::Label mr_is_ok;
diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h
index 5bc8a70280..741119d7f7 100644
--- a/compiler/utils/arm/assembler_arm_vixl.h
+++ b/compiler/utils/arm/assembler_arm_vixl.h
@@ -19,15 +19,12 @@
#include <android-base/logging.h>
-#include "base/arena_containers.h"
#include "base/macros.h"
#include "constants_arm.h"
#include "dwarf/register.h"
#include "offsets.h"
-#include "utils/arm/assembler_arm_shared.h"
#include "utils/arm/managed_register_arm.h"
#include "utils/assembler.h"
-#include "utils/jni_macro_assembler.h"
// TODO(VIXL): Make VIXL compile with -Wshadow and remove pragmas.
#pragma GCC diagnostic push
@@ -37,7 +34,7 @@
namespace vixl32 = vixl::aarch32;
-namespace art {
+namespace art HIDDEN {
namespace arm {
inline dwarf::Reg DWARFReg(vixl32::Register reg) {
@@ -48,6 +45,26 @@ inline dwarf::Reg DWARFReg(vixl32::SRegister reg) {
return dwarf::Reg::ArmFp(static_cast<int>(reg.GetCode()));
}
+enum LoadOperandType {
+ kLoadSignedByte,
+ kLoadUnsignedByte,
+ kLoadSignedHalfword,
+ kLoadUnsignedHalfword,
+ kLoadWord,
+ kLoadWordPair,
+ kLoadSWord,
+ kLoadDWord
+};
+
+enum StoreOperandType {
+ kStoreByte,
+ kStoreHalfword,
+ kStoreWord,
+ kStoreWordPair,
+ kStoreSWord,
+ kStoreDWord
+};
+
class ArmVIXLMacroAssembler final : public vixl32::MacroAssembler {
public:
// Most methods fit in a 1KB code buffer, which results in more optimal alloc/realloc and
diff --git a/compiler/utils/arm/constants_arm.cc b/compiler/utils/arm/constants_arm.cc
index b02b343b26..a927fc201a 100644
--- a/compiler/utils/arm/constants_arm.cc
+++ b/compiler/utils/arm/constants_arm.cc
@@ -16,7 +16,7 @@
#include "constants_arm.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
std::ostream& operator<<(std::ostream& os, const DRegister& rhs) {
diff --git a/compiler/utils/arm/constants_arm.h b/compiler/utils/arm/constants_arm.h
index f42fd9777b..ef6d48dd3b 100644
--- a/compiler/utils/arm/constants_arm.h
+++ b/compiler/utils/arm/constants_arm.h
@@ -26,8 +26,9 @@
#include "arch/arm/registers_arm.h"
#include "base/casts.h"
#include "base/globals.h"
+#include "base/macros.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
// Defines constants and accessor classes to assemble, disassemble and
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index 6e6d40dc92..54873454eb 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -20,6 +20,7 @@
#include <type_traits>
#include "entrypoints/quick/quick_entrypoints.h"
+#include "indirect_reference_table.h"
#include "lock_word.h"
#include "thread.h"
@@ -27,9 +28,8 @@ using namespace vixl::aarch32; // NOLINT(build/namespaces)
namespace vixl32 = vixl::aarch32;
using vixl::ExactAssemblyScope;
-using vixl::CodeBufferCheckScope;
-namespace art {
+namespace art HIDDEN {
namespace arm {
#ifdef ___
@@ -155,7 +155,7 @@ void ArmVIXLJNIMacroAssembler::RemoveFrame(size_t frame_size,
// Pop LR to PC unless we need to emit some read barrier code just before returning.
bool emit_code_before_return =
- (kEmitCompilerReadBarrier && kUseBakerReadBarrier) &&
+ (gUseReadBarrier && kUseBakerReadBarrier) &&
(may_suspend || (kIsDebugBuild && emit_run_time_checks_in_debug_mode_));
if ((core_spill_mask & (1u << lr.GetCode())) != 0u && !emit_code_before_return) {
DCHECK_EQ(core_spill_mask & (1u << pc.GetCode()), 0u);
@@ -215,7 +215,9 @@ void ArmVIXLJNIMacroAssembler::RemoveFrame(size_t frame_size,
}
}
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Emit marking register refresh even with all GCs as we are still using the
+ // register due to nterp's dependency.
+ if (kReserveMarkingRegister) {
if (may_suspend) {
// The method may be suspended; refresh the Marking Register.
___ Ldr(mr, MemOperand(tr, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value()));
@@ -305,13 +307,6 @@ void ArmVIXLJNIMacroAssembler::Store(ManagedRegister m_base,
}
}
-void ArmVIXLJNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
- vixl::aarch32::Register src = AsVIXLRegister(msrc.AsArm());
- UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
- temps.Exclude(src);
- asm_.StoreToOffset(kStoreWord, src, sp, dest.Int32Value());
-}
-
void ArmVIXLJNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
vixl::aarch32::Register src = AsVIXLRegister(msrc.AsArm());
UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
@@ -319,70 +314,6 @@ void ArmVIXLJNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msr
asm_.StoreToOffset(kStoreWord, src, sp, dest.Int32Value());
}
-void ArmVIXLJNIMacroAssembler::StoreSpanning(FrameOffset dest,
- ManagedRegister msrc,
- FrameOffset in_off) {
- vixl::aarch32::Register src = AsVIXLRegister(msrc.AsArm());
- asm_.StoreToOffset(kStoreWord, src, sp, dest.Int32Value());
- UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
- vixl32::Register scratch = temps.Acquire();
- asm_.LoadFromOffset(kLoadWord, scratch, sp, in_off.Int32Value());
- asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value() + 4);
-}
-
-void ArmVIXLJNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) {
- UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
- vixl32::Register scratch = temps.Acquire();
- asm_.LoadFromOffset(kLoadWord, scratch, sp, src.Int32Value());
- asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value());
-}
-
-void ArmVIXLJNIMacroAssembler::CopyRef(FrameOffset dest,
- ManagedRegister base,
- MemberOffset offs,
- bool unpoison_reference) {
- UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
- vixl32::Register scratch = temps.Acquire();
- asm_.LoadFromOffset(kLoadWord, scratch, AsVIXLRegister(base.AsArm()), offs.Int32Value());
- if (unpoison_reference) {
- asm_.MaybeUnpoisonHeapReference(scratch);
- }
- asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value());
-}
-
-void ArmVIXLJNIMacroAssembler::LoadRef(ManagedRegister mdest,
- ManagedRegister mbase,
- MemberOffset offs,
- bool unpoison_reference) {
- vixl::aarch32::Register dest = AsVIXLRegister(mdest.AsArm());
- vixl::aarch32::Register base = AsVIXLRegister(mbase.AsArm());
- UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
- temps.Exclude(dest, base);
- asm_.LoadFromOffset(kLoadWord, dest, base, offs.Int32Value());
-
- if (unpoison_reference) {
- asm_.MaybeUnpoisonHeapReference(dest);
- }
-}
-
-void ArmVIXLJNIMacroAssembler::LoadRef(ManagedRegister dest ATTRIBUTE_UNUSED,
- FrameOffset src ATTRIBUTE_UNUSED) {
- UNIMPLEMENTED(FATAL);
-}
-
-void ArmVIXLJNIMacroAssembler::LoadRawPtr(ManagedRegister dest ATTRIBUTE_UNUSED,
- ManagedRegister base ATTRIBUTE_UNUSED,
- Offset offs ATTRIBUTE_UNUSED) {
- UNIMPLEMENTED(FATAL);
-}
-
-void ArmVIXLJNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm) {
- UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
- vixl32::Register scratch = temps.Acquire();
- asm_.LoadImmediate(scratch, imm);
- asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value());
-}
-
void ArmVIXLJNIMacroAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) {
return Load(m_dst.AsArm(), sp, src.Int32Value(), size);
}
@@ -394,11 +325,6 @@ void ArmVIXLJNIMacroAssembler::Load(ManagedRegister m_dst,
return Load(m_dst.AsArm(), AsVIXLRegister(m_base.AsArm()), offs.Int32Value(), size);
}
-void ArmVIXLJNIMacroAssembler::LoadFromThread(ManagedRegister m_dst,
- ThreadOffset32 src,
- size_t size) {
- return Load(m_dst.AsArm(), tr, src.Int32Value(), size);
-}
void ArmVIXLJNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) {
vixl::aarch32::Register dest = AsVIXLRegister(mdest.AsArm());
@@ -407,29 +333,15 @@ void ArmVIXLJNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister mdest, Threa
asm_.LoadFromOffset(kLoadWord, dest, tr, offs.Int32Value());
}
-void ArmVIXLJNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) {
- UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
- vixl32::Register scratch = temps.Acquire();
- asm_.LoadFromOffset(kLoadWord, scratch, tr, thr_offs.Int32Value());
- asm_.StoreToOffset(kStoreWord, scratch, sp, fr_offs.Int32Value());
-}
-
-void ArmVIXLJNIMacroAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs ATTRIBUTE_UNUSED,
- FrameOffset fr_offs ATTRIBUTE_UNUSED,
- ManagedRegister mscratch ATTRIBUTE_UNUSED) {
- UNIMPLEMENTED(FATAL);
-}
-
-void ArmVIXLJNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs,
- FrameOffset fr_offs) {
- UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
- vixl32::Register scratch = temps.Acquire();
- asm_.AddConstant(scratch, sp, fr_offs.Int32Value());
- asm_.StoreToOffset(kStoreWord, scratch, tr, thr_offs.Int32Value());
-}
-
-void ArmVIXLJNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) {
- asm_.StoreToOffset(kStoreWord, sp, tr, thr_offs.Int32Value());
+void ArmVIXLJNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs, bool tag_sp) {
+ if (tag_sp) {
+ UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+ vixl32::Register reg = temps.Acquire();
+ ___ Orr(reg, sp, 0x2);
+ asm_.StoreToOffset(kStoreWord, reg, tr, thr_offs.Int32Value());
+ } else {
+ asm_.StoreToOffset(kStoreWord, sp, tr, thr_offs.Int32Value());
+ }
}
void ArmVIXLJNIMacroAssembler::SignExtend(ManagedRegister mreg ATTRIBUTE_UNUSED,
@@ -869,6 +781,11 @@ void ArmVIXLJNIMacroAssembler::Move(ManagedRegister mdst,
}
}
+void ArmVIXLJNIMacroAssembler::Move(ManagedRegister mdst, size_t value) {
+ ArmManagedRegister dst = mdst.AsArm();
+ ___ Mov(AsVIXLRegister(dst), static_cast<uint32_t>(value));
+}
+
void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) {
DCHECK(size == 4 || size == 8) << size;
UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
@@ -884,48 +801,6 @@ void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t si
}
}
-void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED,
- ManagedRegister src_base ATTRIBUTE_UNUSED,
- Offset src_offset ATTRIBUTE_UNUSED,
- ManagedRegister mscratch ATTRIBUTE_UNUSED,
- size_t size ATTRIBUTE_UNUSED) {
- UNIMPLEMENTED(FATAL);
-}
-
-void ArmVIXLJNIMacroAssembler::Copy(ManagedRegister dest_base ATTRIBUTE_UNUSED,
- Offset dest_offset ATTRIBUTE_UNUSED,
- FrameOffset src ATTRIBUTE_UNUSED,
- ManagedRegister mscratch ATTRIBUTE_UNUSED,
- size_t size ATTRIBUTE_UNUSED) {
- UNIMPLEMENTED(FATAL);
-}
-
-void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dst ATTRIBUTE_UNUSED,
- FrameOffset src_base ATTRIBUTE_UNUSED,
- Offset src_offset ATTRIBUTE_UNUSED,
- ManagedRegister mscratch ATTRIBUTE_UNUSED,
- size_t size ATTRIBUTE_UNUSED) {
- UNIMPLEMENTED(FATAL);
-}
-
-void ArmVIXLJNIMacroAssembler::Copy(ManagedRegister dest ATTRIBUTE_UNUSED,
- Offset dest_offset ATTRIBUTE_UNUSED,
- ManagedRegister src ATTRIBUTE_UNUSED,
- Offset src_offset ATTRIBUTE_UNUSED,
- ManagedRegister mscratch ATTRIBUTE_UNUSED,
- size_t size ATTRIBUTE_UNUSED) {
- UNIMPLEMENTED(FATAL);
-}
-
-void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dst ATTRIBUTE_UNUSED,
- Offset dest_offset ATTRIBUTE_UNUSED,
- FrameOffset src ATTRIBUTE_UNUSED,
- Offset src_offset ATTRIBUTE_UNUSED,
- ManagedRegister scratch ATTRIBUTE_UNUSED,
- size_t size ATTRIBUTE_UNUSED) {
- UNIMPLEMENTED(FATAL);
-}
-
void ArmVIXLJNIMacroAssembler::CreateJObject(ManagedRegister mout_reg,
FrameOffset spilled_reference_offset,
ManagedRegister min_reg,
@@ -971,33 +846,19 @@ void ArmVIXLJNIMacroAssembler::CreateJObject(ManagedRegister mout_reg,
}
}
-void ArmVIXLJNIMacroAssembler::CreateJObject(FrameOffset out_off,
- FrameOffset spilled_reference_offset,
- bool null_allowed) {
- UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
- vixl32::Register scratch = temps.Acquire();
- if (null_allowed) {
- asm_.LoadFromOffset(kLoadWord, scratch, sp, spilled_reference_offset.Int32Value());
- // Null values get a jobject value null. Otherwise, the jobject is
- // the address of the spilled reference.
- // e.g. scratch = (scratch == 0) ? 0 : (SP+spilled_reference_offset)
- ___ Cmp(scratch, 0);
-
- // FIXME: Using 32-bit T32 instruction in IT-block is deprecated.
- if (asm_.ShifterOperandCanHold(ADD, spilled_reference_offset.Int32Value())) {
- ExactAssemblyScope guard(asm_.GetVIXLAssembler(),
- 2 * vixl32::kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
- ___ it(ne, 0x8);
- asm_.AddConstantInIt(scratch, sp, spilled_reference_offset.Int32Value(), ne);
- } else {
- // TODO: Implement this (old arm assembler would have crashed here).
- UNIMPLEMENTED(FATAL);
- }
- } else {
- asm_.AddConstant(scratch, sp, spilled_reference_offset.Int32Value());
- }
- asm_.StoreToOffset(kStoreWord, scratch, sp, out_off.Int32Value());
+void ArmVIXLJNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister mreg,
+ JNIMacroLabel* slow_path,
+ JNIMacroLabel* resume) {
+ constexpr uint32_t kGlobalOrWeakGlobalMask =
+ dchecked_integral_cast<uint32_t>(IndirectReferenceTable::GetGlobalOrWeakGlobalMask());
+ constexpr uint32_t kIndirectRefKindMask =
+ dchecked_integral_cast<uint32_t>(IndirectReferenceTable::GetIndirectRefKindMask());
+ vixl32::Register reg = AsVIXLRegister(mreg.AsArm());
+ ___ Tst(reg, kGlobalOrWeakGlobalMask);
+ ___ B(ne, ArmVIXLJNIMacroLabel::Cast(slow_path)->AsArm());
+ ___ Bics(reg, reg, kIndirectRefKindMask);
+ ___ B(eq, ArmVIXLJNIMacroLabel::Cast(resume)->AsArm()); // Skip load for null.
+ ___ Ldr(reg, MemOperand(reg));
}
void ArmVIXLJNIMacroAssembler::VerifyObject(ManagedRegister src ATTRIBUTE_UNUSED,
@@ -1165,7 +1026,7 @@ void ArmVIXLJNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnary
UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
vixl32::Register test_reg;
DCHECK_EQ(Thread::IsGcMarkingSize(), 4u);
- DCHECK(kUseReadBarrier);
+ DCHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// TestGcMarking() is used in the JNI stub entry when the marking register is up to date.
if (kIsDebugBuild && emit_run_time_checks_in_debug_mode_) {
@@ -1213,15 +1074,19 @@ void ArmVIXLJNIMacroAssembler::TestMarkBit(ManagedRegister mref,
}
}
+void ArmVIXLJNIMacroAssembler::TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) {
+ UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+ vixl32::Register scratch = temps.Acquire();
+ ___ Mov(scratch, static_cast<uint32_t>(address));
+ ___ Ldrb(scratch, MemOperand(scratch, 0));
+ ___ CompareAndBranchIfNonZero(scratch, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
+}
+
void ArmVIXLJNIMacroAssembler::Bind(JNIMacroLabel* label) {
CHECK(label != nullptr);
___ Bind(ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
}
-void ArmVIXLJNIMacroAssembler::MemoryBarrier(ManagedRegister scratch ATTRIBUTE_UNUSED) {
- UNIMPLEMENTED(FATAL);
-}
-
void ArmVIXLJNIMacroAssembler::Load(ArmManagedRegister dest,
vixl32::Register base,
int32_t offset,
@@ -1243,6 +1108,8 @@ void ArmVIXLJNIMacroAssembler::Load(ArmManagedRegister dest,
}
} else if (dest.IsRegisterPair()) {
CHECK_EQ(8u, size) << dest;
+ // TODO: Use LDRD to improve stubs for @CriticalNative methods with parameters
+ // (long, long, ...). A single 32-bit LDRD is presumably faster than two 16-bit LDRs.
___ Ldr(AsVIXLRegisterPairLow(dest), MemOperand(base, offset));
___ Ldr(AsVIXLRegisterPairHigh(dest), MemOperand(base, offset + 4));
} else if (dest.IsSRegister()) {
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
index ed453ae8ff..f6df7f2c53 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
@@ -23,13 +23,12 @@
#include "base/macros.h"
#include "constants_arm.h"
#include "offsets.h"
-#include "utils/arm/assembler_arm_shared.h"
#include "utils/arm/assembler_arm_vixl.h"
#include "utils/arm/managed_register_arm.h"
#include "utils/assembler.h"
#include "utils/jni_macro_assembler.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
class ArmVIXLJNIMacroAssembler final
@@ -63,34 +62,14 @@ class ArmVIXLJNIMacroAssembler final
// Store routines.
void Store(FrameOffset offs, ManagedRegister src, size_t size) override;
void Store(ManagedRegister base, MemberOffset offs, ManagedRegister src, size_t size) override;
- void StoreRef(FrameOffset dest, ManagedRegister src) override;
void StoreRawPtr(FrameOffset dest, ManagedRegister src) override;
- void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) override;
-
- void StoreStackOffsetToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs) override;
-
- void StoreStackPointerToThread(ThreadOffset32 thr_offs) override;
-
- void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off) override;
+ void StoreStackPointerToThread(ThreadOffset32 thr_offs, bool tag_sp) override;
// Load routines.
void Load(ManagedRegister dest, FrameOffset src, size_t size) override;
void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) override;
- void LoadFromThread(ManagedRegister dest,
- ThreadOffset32 src,
- size_t size) override;
-
- void LoadRef(ManagedRegister dest, FrameOffset src) override;
-
- void LoadRef(ManagedRegister dest,
- ManagedRegister base,
- MemberOffset offs,
- bool unpoison_reference) override;
-
- void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) override;
-
void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) override;
// Copying routines.
@@ -100,51 +79,7 @@ class ArmVIXLJNIMacroAssembler final
void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
- void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) override;
-
- void CopyRawPtrToThread(ThreadOffset32 thr_offs,
- FrameOffset fr_offs,
- ManagedRegister scratch) override;
-
- void CopyRef(FrameOffset dest, FrameOffset src) override;
- void CopyRef(FrameOffset dest,
- ManagedRegister base,
- MemberOffset offs,
- bool unpoison_reference) override;
-
- void Copy(FrameOffset dest, FrameOffset src, size_t size) override;
-
- void Copy(FrameOffset dest,
- ManagedRegister src_base,
- Offset src_offset,
- ManagedRegister scratch,
- size_t size) override;
-
- void Copy(ManagedRegister dest_base,
- Offset dest_offset,
- FrameOffset src,
- ManagedRegister scratch,
- size_t size) override;
-
- void Copy(FrameOffset dest,
- FrameOffset src_base,
- Offset src_offset,
- ManagedRegister scratch,
- size_t size) override;
-
- void Copy(ManagedRegister dest,
- Offset dest_offset,
- ManagedRegister src,
- Offset src_offset,
- ManagedRegister scratch,
- size_t size) override;
-
- void Copy(FrameOffset dest,
- Offset dest_offset,
- FrameOffset src,
- Offset src_offset,
- ManagedRegister scratch,
- size_t size) override;
+ void Move(ManagedRegister dest, size_t value) override;
// Sign extension.
void SignExtend(ManagedRegister mreg, size_t size) override;
@@ -156,20 +91,10 @@ class ArmVIXLJNIMacroAssembler final
void GetCurrentThread(ManagedRegister dest) override;
void GetCurrentThread(FrameOffset dest_offset) override;
- // Set up `out_reg` to hold a `jobject` (`StackReference<Object>*` to a spilled value),
- // or to be null if the value is null and `null_allowed`. `in_reg` holds a possibly
- // stale reference that can be used to avoid loading the spilled value to
- // see if the value is null.
- void CreateJObject(ManagedRegister out_reg,
- FrameOffset spilled_reference_offset,
- ManagedRegister in_reg,
- bool null_allowed) override;
-
- // Set up `out_off` to hold a `jobject` (`StackReference<Object>*` to a spilled value),
- // or to be null if the value is null and `null_allowed`.
- void CreateJObject(FrameOffset out_off,
- FrameOffset spilled_reference_offset,
- bool null_allowed) override;
+ // Decode JNI transition or local `jobject`. For (weak) global `jobject`, jump to slow path.
+ void DecodeJNITransitionOrLocalJObject(ManagedRegister reg,
+ JNIMacroLabel* slow_path,
+ JNIMacroLabel* resume) override;
// Heap::VerifyObject on src. In some cases (such as a reference to this) we
// know that src may not be null.
@@ -213,17 +138,28 @@ class ArmVIXLJNIMacroAssembler final
void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
// Emit a conditional jump to the label by applying a unary condition test to object's mark bit.
void TestMarkBit(ManagedRegister ref, JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
+ // Emit a conditional jump to label if the loaded value from specified locations is not zero.
+ void TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) override;
// Code at this offset will serve as the target for the Jump call.
void Bind(JNIMacroLabel* label) override;
- void MemoryBarrier(ManagedRegister scratch) override;
-
+ private:
+ void Copy(FrameOffset dest, FrameOffset src, size_t size);
void Load(ArmManagedRegister dest, vixl32::Register base, int32_t offset, size_t size);
- private:
+ // Set up `out_reg` to hold a `jobject` (`StackReference<Object>*` to a spilled value),
+ // or to be null if the value is null and `null_allowed`. `in_reg` holds a possibly
+ // stale reference that can be used to avoid loading the spilled value to
+ // see if the value is null.
+ void CreateJObject(ManagedRegister out_reg,
+ FrameOffset spilled_reference_offset,
+ ManagedRegister in_reg,
+ bool null_allowed);
+
// Used for testing.
- friend class ArmVIXLAssemblerTest_VixlLoadFromOffset_Test;
- friend class ArmVIXLAssemblerTest_VixlStoreToOffset_Test;
+ ART_FRIEND_TEST(ArmVIXLAssemblerTest, VixlJniHelpers);
+ ART_FRIEND_TEST(ArmVIXLAssemblerTest, VixlLoadFromOffset);
+ ART_FRIEND_TEST(ArmVIXLAssemblerTest, VixlStoreToOffset);
};
class ArmVIXLJNIMacroLabel final
diff --git a/compiler/utils/arm/managed_register_arm.cc b/compiler/utils/arm/managed_register_arm.cc
index deff658b4f..07d50da910 100644
--- a/compiler/utils/arm/managed_register_arm.cc
+++ b/compiler/utils/arm/managed_register_arm.cc
@@ -18,7 +18,7 @@
#include "base/globals.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
// Returns true if this managed-register overlaps the other managed-register.
diff --git a/compiler/utils/arm/managed_register_arm.h b/compiler/utils/arm/managed_register_arm.h
index 6d942fa774..b3d436c10f 100644
--- a/compiler/utils/arm/managed_register_arm.h
+++ b/compiler/utils/arm/managed_register_arm.h
@@ -19,10 +19,11 @@
#include <android-base/logging.h>
+#include "base/macros.h"
#include "constants_arm.h"
#include "utils/managed_register.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
// Values for register pairs.
diff --git a/compiler/utils/arm/managed_register_arm_test.cc b/compiler/utils/arm/managed_register_arm_test.cc
index 6f440a7c81..60f6090edd 100644
--- a/compiler/utils/arm/managed_register_arm_test.cc
+++ b/compiler/utils/arm/managed_register_arm_test.cc
@@ -16,9 +16,10 @@
#include "managed_register_arm.h"
#include "base/globals.h"
+#include "base/macros.h"
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
TEST(ArmManagedRegister, NoRegister) {
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 6100ed9855..26dce7c502 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -16,7 +16,6 @@
#include "arch/arm64/instruction_set_features_arm64.h"
#include "assembler_arm64.h"
-#include "base/bit_utils_iterator.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "heap_poisoning.h"
#include "offsets.h"
@@ -24,7 +23,7 @@
using namespace vixl::aarch64; // NOLINT(build/namespaces)
-namespace art {
+namespace art HIDDEN {
namespace arm64 {
#ifdef ___
@@ -187,9 +186,7 @@ void Arm64Assembler::MaybeUnpoisonHeapReference(Register reg) {
}
void Arm64Assembler::GenerateMarkingRegisterCheck(Register temp, int code) {
- // The Marking Register is only used in the Baker read barrier configuration.
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
+ DCHECK(kReserveMarkingRegister);
vixl::aarch64::Register mr = reg_x(MR); // Marking Register.
vixl::aarch64::Register tr = reg_x(TR); // Thread Register.
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index b49a13a067..f8168903bd 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -23,7 +23,6 @@
#include <android-base/logging.h>
-#include "base/arena_containers.h"
#include "base/bit_utils_iterator.h"
#include "base/macros.h"
#include "dwarf/register.h"
@@ -38,7 +37,7 @@
#include "aarch64/macro-assembler-aarch64.h"
#pragma GCC diagnostic pop
-namespace art {
+namespace art HIDDEN {
class Arm64InstructionSetFeatures;
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index 50ca468499..9e9f122cf6 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -17,6 +17,7 @@
#include "jni_macro_assembler_arm64.h"
#include "entrypoints/quick/quick_entrypoints.h"
+#include "indirect_reference_table.h"
#include "lock_word.h"
#include "managed_register_arm64.h"
#include "offsets.h"
@@ -24,7 +25,7 @@
using namespace vixl::aarch64; // NOLINT(build/namespaces)
-namespace art {
+namespace art HIDDEN {
namespace arm64 {
#ifdef ___
@@ -191,46 +192,22 @@ void Arm64JNIMacroAssembler::Store(ManagedRegister m_base,
}
}
-void Arm64JNIMacroAssembler::StoreRef(FrameOffset offs, ManagedRegister m_src) {
- Arm64ManagedRegister src = m_src.AsArm64();
- CHECK(src.IsXRegister()) << src;
- StoreWToOffset(kStoreWord, src.AsOverlappingWRegister(), SP,
- offs.Int32Value());
-}
-
void Arm64JNIMacroAssembler::StoreRawPtr(FrameOffset offs, ManagedRegister m_src) {
Arm64ManagedRegister src = m_src.AsArm64();
CHECK(src.IsXRegister()) << src;
StoreToOffset(src.AsXRegister(), SP, offs.Int32Value());
}
-void Arm64JNIMacroAssembler::StoreImmediateToFrame(FrameOffset offs, uint32_t imm) {
- UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
- Register scratch = temps.AcquireW();
- ___ Mov(scratch, imm);
- ___ Str(scratch, MEM_OP(reg_x(SP), offs.Int32Value()));
-}
-
-void Arm64JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset64 tr_offs, FrameOffset fr_offs) {
- UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
- Register scratch = temps.AcquireX();
- ___ Add(scratch, reg_x(SP), fr_offs.Int32Value());
- ___ Str(scratch, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
-}
-
-void Arm64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 tr_offs) {
+void Arm64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 tr_offs, bool tag_sp) {
UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
Register scratch = temps.AcquireX();
___ Mov(scratch, reg_x(SP));
+ if (tag_sp) {
+ ___ Orr(scratch, scratch, 0x2);
+ }
___ Str(scratch, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
}
-void Arm64JNIMacroAssembler::StoreSpanning(FrameOffset dest_off ATTRIBUTE_UNUSED,
- ManagedRegister m_source ATTRIBUTE_UNUSED,
- FrameOffset in_off ATTRIBUTE_UNUSED) {
- UNIMPLEMENTED(FATAL); // This case is not applicable to ARM64.
-}
-
// Load routines.
void Arm64JNIMacroAssembler::LoadImmediate(XRegister dest, int32_t value, Condition cond) {
if ((cond == al) || (cond == nv)) {
@@ -329,45 +306,6 @@ void Arm64JNIMacroAssembler::Load(ManagedRegister m_dst,
return Load(m_dst.AsArm64(), m_base.AsArm64().AsXRegister(), offs.Int32Value(), size);
}
-void Arm64JNIMacroAssembler::LoadFromThread(ManagedRegister m_dst,
- ThreadOffset64 src,
- size_t size) {
- return Load(m_dst.AsArm64(), TR, src.Int32Value(), size);
-}
-
-void Arm64JNIMacroAssembler::LoadRef(ManagedRegister m_dst, FrameOffset offs) {
- Arm64ManagedRegister dst = m_dst.AsArm64();
- CHECK(dst.IsXRegister()) << dst;
- LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), SP, offs.Int32Value());
-}
-
-void Arm64JNIMacroAssembler::LoadRef(ManagedRegister m_dst,
- ManagedRegister m_base,
- MemberOffset offs,
- bool unpoison_reference) {
- Arm64ManagedRegister dst = m_dst.AsArm64();
- Arm64ManagedRegister base = m_base.AsArm64();
- CHECK(dst.IsXRegister() && base.IsXRegister());
- LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), base.AsXRegister(),
- offs.Int32Value());
- if (unpoison_reference) {
- WRegister ref_reg = dst.AsOverlappingWRegister();
- asm_.MaybeUnpoisonHeapReference(reg_w(ref_reg));
- }
-}
-
-void Arm64JNIMacroAssembler::LoadRawPtr(ManagedRegister m_dst,
- ManagedRegister m_base,
- Offset offs) {
- Arm64ManagedRegister dst = m_dst.AsArm64();
- Arm64ManagedRegister base = m_base.AsArm64();
- CHECK(dst.IsXRegister() && base.IsXRegister());
- // Remove dst and base form the temp list - higher level API uses IP1, IP0.
- UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
- temps.Exclude(reg_x(dst.AsXRegister()), reg_x(base.AsXRegister()));
- ___ Ldr(reg_x(dst.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value()));
-}
-
void Arm64JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister m_dst, ThreadOffset64 offs) {
Arm64ManagedRegister dst = m_dst.AsArm64();
CHECK(dst.IsXRegister()) << dst;
@@ -640,40 +578,10 @@ void Arm64JNIMacroAssembler::Move(ManagedRegister m_dst, ManagedRegister m_src,
}
}
-void Arm64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 tr_offs) {
- UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
- Register scratch = temps.AcquireX();
- ___ Ldr(scratch, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
- ___ Str(scratch, MEM_OP(sp, fr_offs.Int32Value()));
-}
-
-void Arm64JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset64 tr_offs,
- FrameOffset fr_offs,
- ManagedRegister m_scratch) {
- Arm64ManagedRegister scratch = m_scratch.AsArm64();
- CHECK(scratch.IsXRegister()) << scratch;
- LoadFromOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value());
- StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
-}
-
-void Arm64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) {
- UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
- Register scratch = temps.AcquireW();
- ___ Ldr(scratch, MEM_OP(reg_x(SP), src.Int32Value()));
- ___ Str(scratch, MEM_OP(reg_x(SP), dest.Int32Value()));
-}
-
-void Arm64JNIMacroAssembler::CopyRef(FrameOffset dest,
- ManagedRegister base,
- MemberOffset offs,
- bool unpoison_reference) {
- UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
- Register scratch = temps.AcquireW();
- ___ Ldr(scratch, MEM_OP(reg_x(base.AsArm64().AsXRegister()), offs.Int32Value()));
- if (unpoison_reference) {
- asm_.MaybeUnpoisonHeapReference(scratch);
- }
- ___ Str(scratch, MEM_OP(reg_x(SP), dest.Int32Value()));
+void Arm64JNIMacroAssembler::Move(ManagedRegister m_dst, size_t value) {
+ Arm64ManagedRegister dst = m_dst.AsArm64();
+ DCHECK(dst.IsXRegister());
+ ___ Mov(reg_x(dst.AsXRegister()), value);
}
void Arm64JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) {
@@ -684,105 +592,6 @@ void Arm64JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size
___ Str(scratch, MEM_OP(reg_x(SP), dest.Int32Value()));
}
-void Arm64JNIMacroAssembler::Copy(FrameOffset dest,
- ManagedRegister src_base,
- Offset src_offset,
- ManagedRegister m_scratch,
- size_t size) {
- Arm64ManagedRegister scratch = m_scratch.AsArm64();
- Arm64ManagedRegister base = src_base.AsArm64();
- CHECK(base.IsXRegister()) << base;
- CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch;
- CHECK(size == 4 || size == 8) << size;
- if (size == 4) {
- LoadWFromOffset(kLoadWord, scratch.AsWRegister(), base.AsXRegister(),
- src_offset.Int32Value());
- StoreWToOffset(kStoreWord, scratch.AsWRegister(), SP, dest.Int32Value());
- } else if (size == 8) {
- LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), src_offset.Int32Value());
- StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value());
- } else {
- UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
- }
-}
-
-void Arm64JNIMacroAssembler::Copy(ManagedRegister m_dest_base,
- Offset dest_offs,
- FrameOffset src,
- ManagedRegister m_scratch,
- size_t size) {
- Arm64ManagedRegister scratch = m_scratch.AsArm64();
- Arm64ManagedRegister base = m_dest_base.AsArm64();
- CHECK(base.IsXRegister()) << base;
- CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch;
- CHECK(size == 4 || size == 8) << size;
- if (size == 4) {
- LoadWFromOffset(kLoadWord, scratch.AsWRegister(), SP, src.Int32Value());
- StoreWToOffset(kStoreWord, scratch.AsWRegister(), base.AsXRegister(),
- dest_offs.Int32Value());
- } else if (size == 8) {
- LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value());
- StoreToOffset(scratch.AsXRegister(), base.AsXRegister(), dest_offs.Int32Value());
- } else {
- UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
- }
-}
-
-void Arm64JNIMacroAssembler::Copy(FrameOffset /*dst*/,
- FrameOffset /*src_base*/,
- Offset /*src_offset*/,
- ManagedRegister /*mscratch*/,
- size_t /*size*/) {
- UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant";
-}
-
-void Arm64JNIMacroAssembler::Copy(ManagedRegister m_dest,
- Offset dest_offset,
- ManagedRegister m_src,
- Offset src_offset,
- ManagedRegister m_scratch,
- size_t size) {
- Arm64ManagedRegister scratch = m_scratch.AsArm64();
- Arm64ManagedRegister src = m_src.AsArm64();
- Arm64ManagedRegister dest = m_dest.AsArm64();
- CHECK(dest.IsXRegister()) << dest;
- CHECK(src.IsXRegister()) << src;
- CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch;
- CHECK(size == 4 || size == 8) << size;
- if (size == 4) {
- if (scratch.IsWRegister()) {
- LoadWFromOffset(kLoadWord, scratch.AsWRegister(), src.AsXRegister(),
- src_offset.Int32Value());
- StoreWToOffset(kStoreWord, scratch.AsWRegister(), dest.AsXRegister(),
- dest_offset.Int32Value());
- } else {
- LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), src.AsXRegister(),
- src_offset.Int32Value());
- StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), dest.AsXRegister(),
- dest_offset.Int32Value());
- }
- } else if (size == 8) {
- LoadFromOffset(scratch.AsXRegister(), src.AsXRegister(), src_offset.Int32Value());
- StoreToOffset(scratch.AsXRegister(), dest.AsXRegister(), dest_offset.Int32Value());
- } else {
- UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
- }
-}
-
-void Arm64JNIMacroAssembler::Copy(FrameOffset /*dst*/,
- Offset /*dest_offset*/,
- FrameOffset /*src*/,
- Offset /*src_offset*/,
- ManagedRegister /*scratch*/,
- size_t /*size*/) {
- UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant";
-}
-
-void Arm64JNIMacroAssembler::MemoryBarrier(ManagedRegister m_scratch ATTRIBUTE_UNUSED) {
- // TODO: Should we check that m_scratch is IP? - see arm.
- ___ Dmb(InnerShareable, BarrierAll);
-}
-
void Arm64JNIMacroAssembler::SignExtend(ManagedRegister mreg, size_t size) {
Arm64ManagedRegister reg = mreg.AsArm64();
CHECK(size == 1 || size == 2) << size;
@@ -882,6 +691,19 @@ void Arm64JNIMacroAssembler::CreateJObject(FrameOffset out_off,
___ Str(scratch, MEM_OP(reg_x(SP), out_off.Int32Value()));
}
+void Arm64JNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister m_reg,
+ JNIMacroLabel* slow_path,
+ JNIMacroLabel* resume) {
+ constexpr uint64_t kGlobalOrWeakGlobalMask = IndirectReferenceTable::GetGlobalOrWeakGlobalMask();
+ constexpr uint64_t kIndirectRefKindMask = IndirectReferenceTable::GetIndirectRefKindMask();
+ constexpr size_t kGlobalOrWeakGlobalBit = WhichPowerOf2(kGlobalOrWeakGlobalMask);
+ Register reg = reg_w(m_reg.AsArm64().AsWRegister());
+ ___ Tbnz(reg.X(), kGlobalOrWeakGlobalBit, Arm64JNIMacroLabel::Cast(slow_path)->AsArm64());
+ ___ And(reg.X(), reg.X(), ~kIndirectRefKindMask);
+ ___ Cbz(reg.X(), Arm64JNIMacroLabel::Cast(resume)->AsArm64()); // Skip load for null.
+ ___ Ldr(reg, MEM_OP(reg.X()));
+}
+
void Arm64JNIMacroAssembler::TryToTransitionFromRunnableToNative(
JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs ATTRIBUTE_UNUSED) {
constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
@@ -989,7 +811,7 @@ void Arm64JNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCo
UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
Register test_reg;
DCHECK_EQ(Thread::IsGcMarkingSize(), 4u);
- DCHECK(kUseReadBarrier);
+ DCHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// TestGcMarking() is used in the JNI stub entry when the marking register is up to date.
if (kIsDebugBuild && emit_run_time_checks_in_debug_mode_) {
@@ -1037,6 +859,14 @@ void Arm64JNIMacroAssembler::TestMarkBit(ManagedRegister m_ref,
}
}
+void Arm64JNIMacroAssembler::TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) {
+ UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+ Register scratch = temps.AcquireX();
+ ___ Mov(scratch, address);
+ ___ Ldrb(scratch.W(), MEM_OP(scratch, 0));
+ ___ Cbnz(scratch.W(), Arm64JNIMacroLabel::Cast(label)->AsArm64());
+}
+
void Arm64JNIMacroAssembler::Bind(JNIMacroLabel* label) {
CHECK(label != nullptr);
___ Bind(Arm64JNIMacroLabel::Cast(label)->AsArm64());
@@ -1107,7 +937,9 @@ void Arm64JNIMacroAssembler::RemoveFrame(size_t frame_size,
asm_.UnspillRegisters(core_reg_list, frame_size - core_reg_size);
asm_.UnspillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size);
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Emit marking register refresh even with all GCs as we are still using the
+ // register due to nterp's dependency.
+ if (kReserveMarkingRegister) {
vixl::aarch64::Register mr = reg_x(MR); // Marking Register.
vixl::aarch64::Register tr = reg_x(TR); // Thread Register.
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h
index 2c04184848..2836e0947d 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.h
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h
@@ -37,7 +37,7 @@
#include "aarch64/macro-assembler-aarch64.h"
#pragma GCC diagnostic pop
-namespace art {
+namespace art HIDDEN {
namespace arm64 {
class Arm64JNIMacroAssembler final : public JNIMacroAssemblerFwd<Arm64Assembler, PointerSize::k64> {
@@ -68,23 +68,12 @@ class Arm64JNIMacroAssembler final : public JNIMacroAssemblerFwd<Arm64Assembler,
// Store routines.
void Store(FrameOffset offs, ManagedRegister src, size_t size) override;
void Store(ManagedRegister base, MemberOffset offs, ManagedRegister src, size_t size) override;
- void StoreRef(FrameOffset dest, ManagedRegister src) override;
void StoreRawPtr(FrameOffset dest, ManagedRegister src) override;
- void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) override;
- void StoreStackOffsetToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs) override;
- void StoreStackPointerToThread(ThreadOffset64 thr_offs) override;
- void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off) override;
+ void StoreStackPointerToThread(ThreadOffset64 thr_offs, bool tag_sp) override;
// Load routines.
void Load(ManagedRegister dest, FrameOffset src, size_t size) override;
void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) override;
- void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) override;
- void LoadRef(ManagedRegister dest, FrameOffset src) override;
- void LoadRef(ManagedRegister dest,
- ManagedRegister base,
- MemberOffset offs,
- bool unpoison_reference) override;
- void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) override;
void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) override;
// Copying routines.
@@ -92,43 +81,7 @@ class Arm64JNIMacroAssembler final : public JNIMacroAssemblerFwd<Arm64Assembler,
ArrayRef<ArgumentLocation> srcs,
ArrayRef<FrameOffset> refs) override;
void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
- void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 thr_offs) override;
- void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
- override;
- void CopyRef(FrameOffset dest, FrameOffset src) override;
- void CopyRef(FrameOffset dest,
- ManagedRegister base,
- MemberOffset offs,
- bool unpoison_reference) override;
- void Copy(FrameOffset dest, FrameOffset src, size_t size) override;
- void Copy(FrameOffset dest,
- ManagedRegister src_base,
- Offset src_offset,
- ManagedRegister scratch,
- size_t size) override;
- void Copy(ManagedRegister dest_base,
- Offset dest_offset,
- FrameOffset src,
- ManagedRegister scratch,
- size_t size) override;
- void Copy(FrameOffset dest,
- FrameOffset src_base,
- Offset src_offset,
- ManagedRegister scratch,
- size_t size) override;
- void Copy(ManagedRegister dest,
- Offset dest_offset,
- ManagedRegister src,
- Offset src_offset,
- ManagedRegister scratch,
- size_t size) override;
- void Copy(FrameOffset dest,
- Offset dest_offset,
- FrameOffset src,
- Offset src_offset,
- ManagedRegister scratch,
- size_t size) override;
- void MemoryBarrier(ManagedRegister scratch) override;
+ void Move(ManagedRegister dest, size_t value) override;
// Sign extension.
void SignExtend(ManagedRegister mreg, size_t size) override;
@@ -140,20 +93,10 @@ class Arm64JNIMacroAssembler final : public JNIMacroAssemblerFwd<Arm64Assembler,
void GetCurrentThread(ManagedRegister dest) override;
void GetCurrentThread(FrameOffset dest_offset) override;
- // Set up `out_reg` to hold a `jobject` (`StackReference<Object>*` to a spilled value),
- // or to be null if the value is null and `null_allowed`. `in_reg` holds a possibly
- // stale reference that can be used to avoid loading the spilled value to
- // see if the value is null.
- void CreateJObject(ManagedRegister out_reg,
- FrameOffset spilled_reference_offset,
- ManagedRegister in_reg,
- bool null_allowed) override;
-
- // Set up `out_off` to hold a `jobject` (`StackReference<Object>*` to a spilled value),
- // or to be null if the value is null and `null_allowed`.
- void CreateJObject(FrameOffset out_off,
- FrameOffset spilled_reference_offset,
- bool null_allowed) override;
+ // Decode JNI transition or local `jobject`. For (weak) global `jobject`, jump to slow path.
+ void DecodeJNITransitionOrLocalJObject(ManagedRegister reg,
+ JNIMacroLabel* slow_path,
+ JNIMacroLabel* resume) override;
// Heap::VerifyObject on src. In some cases (such as a reference to this) we
// know that src may not be null.
@@ -197,6 +140,8 @@ class Arm64JNIMacroAssembler final : public JNIMacroAssemblerFwd<Arm64Assembler,
void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
// Emit a conditional jump to the label by applying a unary condition test to object's mark bit.
void TestMarkBit(ManagedRegister ref, JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
+ // Emit a conditional jump to label if the loaded value from specified locations is not zero.
+ void TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) override;
// Code at this offset will serve as the target for the Jump call.
void Bind(JNIMacroLabel* label) override;
@@ -220,6 +165,24 @@ class Arm64JNIMacroAssembler final : public JNIMacroAssemblerFwd<Arm64Assembler,
void LoadFromOffset(XRegister dest, XRegister base, int32_t offset);
void LoadSFromOffset(SRegister dest, XRegister base, int32_t offset);
void LoadDFromOffset(DRegister dest, XRegister base, int32_t offset);
+
+ void Copy(FrameOffset dest, FrameOffset src, size_t size);
+
+ // Set up `out_reg` to hold a `jobject` (`StackReference<Object>*` to a spilled value),
+ // or to be null if the value is null and `null_allowed`. `in_reg` holds a possibly
+ // stale reference that can be used to avoid loading the spilled value to
+ // see if the value is null.
+ void CreateJObject(ManagedRegister out_reg,
+ FrameOffset spilled_reference_offset,
+ ManagedRegister in_reg,
+ bool null_allowed);
+
+ // Set up `out_off` to hold a `jobject` (`StackReference<Object>*` to a spilled value),
+ // or to be null if the value is null and `null_allowed`.
+ void CreateJObject(FrameOffset out_off,
+ FrameOffset spilled_reference_offset,
+ bool null_allowed);
+
void AddConstant(XRegister rd,
int32_t value,
vixl::aarch64::Condition cond = vixl::aarch64::al);
diff --git a/compiler/utils/arm64/managed_register_arm64.cc b/compiler/utils/arm64/managed_register_arm64.cc
index 5632265646..74a35452db 100644
--- a/compiler/utils/arm64/managed_register_arm64.cc
+++ b/compiler/utils/arm64/managed_register_arm64.cc
@@ -17,7 +17,7 @@
#include "managed_register_arm64.h"
#include "base/globals.h"
-namespace art {
+namespace art HIDDEN {
namespace arm64 {
// TODO: Define convention
diff --git a/compiler/utils/arm64/managed_register_arm64.h b/compiler/utils/arm64/managed_register_arm64.h
index 8a06f631a1..7e8c976b23 100644
--- a/compiler/utils/arm64/managed_register_arm64.h
+++ b/compiler/utils/arm64/managed_register_arm64.h
@@ -20,9 +20,10 @@
#include <android-base/logging.h>
#include "arch/arm64/registers_arm64.h"
+#include "base/macros.h"
#include "utils/managed_register.h"
-namespace art {
+namespace art HIDDEN {
namespace arm64 {
const int kNumberOfXRegIds = kNumberOfXRegisters;
diff --git a/compiler/utils/arm64/managed_register_arm64_test.cc b/compiler/utils/arm64/managed_register_arm64_test.cc
index d151ac99e7..f250360639 100644
--- a/compiler/utils/arm64/managed_register_arm64_test.cc
+++ b/compiler/utils/arm64/managed_register_arm64_test.cc
@@ -18,9 +18,10 @@
#include "assembler_arm64.h"
#include "base/globals.h"
+#include "base/macros.h"
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
namespace arm64 {
TEST(Arm64ManagedRegister, NoRegister) {
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index d1d2a3d556..b82f0dc4b4 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -23,7 +23,7 @@
#include "base/globals.h"
#include "base/memory_region.h"
-namespace art {
+namespace art HIDDEN {
AssemblerBuffer::AssemblerBuffer(ArenaAllocator* allocator)
: allocator_(allocator) {
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 4b4fb14df6..13a5d9fd01 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -37,7 +37,7 @@
#include "x86/constants_x86.h"
#include "x86_64/constants_x86_64.h"
-namespace art {
+namespace art HIDDEN {
class Assembler;
class AssemblerBuffer;
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index bb22fe5bde..d03e5a7abc 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -26,11 +26,12 @@
#include <fstream>
#include <iterator>
+#include "base/macros.h"
#include "base/malloc_arena_pool.h"
#include "assembler_test_base.h"
#include "common_runtime_test.h" // For ScratchFile
-namespace art {
+namespace art HIDDEN {
// Helper for a constexpr string length.
constexpr size_t ConstexprStrLen(char const* str, size_t count = 0) {
@@ -59,7 +60,7 @@ class AssemblerTest : public AssemblerTestBase {
return assembler_.get();
}
- typedef std::string (*TestFn)(AssemblerTest* assembler_test, Ass* assembler);
+ using TestFn = std::string (*)(AssemblerTest *, Ass *);
void DriverFn(TestFn f, const std::string& test_name) {
DriverWrapper(f(this, assembler_.get()), test_name);
@@ -259,7 +260,7 @@ class AssemblerTest : public AssemblerTestBase {
std::string (AssemblerTest::*GetName1)(const Reg1&),
std::string (AssemblerTest::*GetName2)(const Reg2&),
std::string (AssemblerTest::*GetName3)(const Reg3&),
- std::string fmt,
+ const std::string& fmt,
int bias) {
std::string str;
std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0));
diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h
index bf73808603..73f3657413 100644
--- a/compiler/utils/assembler_test_base.h
+++ b/compiler/utils/assembler_test_base.h
@@ -26,6 +26,7 @@
#include "android-base/strings.h"
+#include "base/macros.h"
#include "base/os.h"
#include "base/utils.h"
#include "common_runtime_test.h" // For ScratchDir.
@@ -34,7 +35,7 @@
#include "exec_utils.h"
#include "stream/file_output_stream.h"
-namespace art {
+namespace art HIDDEN {
// If you want to take a look at the differences between the ART assembler and clang,
// set this flag to true. The disassembled files will then remain in the tmp directory.
@@ -59,7 +60,7 @@ class AssemblerTestBase : public testing::Test {
// This is intended to be run as a test.
bool CheckTools() {
- for (auto cmd : { GetAssemblerCommand()[0], GetDisassemblerCommand()[0] }) {
+ for (const std::string& cmd : { GetAssemblerCommand()[0], GetDisassemblerCommand()[0] }) {
if (!OS::FileExists(cmd.c_str())) {
LOG(ERROR) << "Could not find " << cmd;
return false;
@@ -84,7 +85,7 @@ class AssemblerTestBase : public testing::Test {
// Assemble reference object file.
std::string ref_obj_file = test_path(".ref.o");
- ASSERT_TRUE(Assemble(ref_asm_file.c_str(), ref_obj_file.c_str()));
+ ASSERT_TRUE(Assemble(ref_asm_file, ref_obj_file));
// Read the code produced by assembler from the ELF file.
std::vector<uint8_t> ref_code;
@@ -153,9 +154,14 @@ class AssemblerTestBase : public testing::Test {
virtual std::vector<std::string> GetDisassemblerCommand() {
switch (GetIsa()) {
case InstructionSet::kThumb2:
- return {FindTool("llvm-objdump"), "--disassemble", "--triple", "thumbv7a-linux-gnueabi"};
+ return {FindTool("llvm-objdump"),
+ "--disassemble",
+ "--no-print-imm-hex",
+ "--triple",
+ "thumbv7a-linux-gnueabi"};
default:
- return {FindTool("llvm-objdump"), "--disassemble", "--no-show-raw-insn"};
+ return {
+ FindTool("llvm-objdump"), "--disassemble", "--no-print-imm-hex", "--no-show-raw-insn"};
}
}
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index b2d4dcd9f6..672cd3d10f 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -30,10 +30,11 @@
#include "utils/assembler_test_base.h"
#include "base/hex_dump.h"
+#include "base/macros.h"
#include "base/malloc_arena_pool.h"
#include "common_runtime_test.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
// Include results file (generated manually)
@@ -143,7 +144,6 @@ TEST_F(ArmVIXLAssemblerTest, VixlJniHelpers) {
__ Load(scratch_register, FrameOffset(4092), 4);
__ Load(scratch_register, FrameOffset(4096), 4);
__ LoadRawPtrFromThread(scratch_register, ThreadOffset32(512));
- __ LoadRef(method_register, scratch_register, MemberOffset(128), /* unpoison_reference= */ false);
// Stores
__ Store(FrameOffset(32), method_register, 4);
@@ -153,19 +153,67 @@ TEST_F(ArmVIXLAssemblerTest, VixlJniHelpers) {
__ Store(FrameOffset(1024), method_register, 4);
__ Store(FrameOffset(4092), scratch_register, 4);
__ Store(FrameOffset(4096), scratch_register, 4);
- __ StoreImmediateToFrame(FrameOffset(48), 0xFF);
- __ StoreImmediateToFrame(FrameOffset(48), 0xFFFFFF);
__ StoreRawPtr(FrameOffset(48), scratch_register);
- __ StoreRef(FrameOffset(48), scratch_register);
- __ StoreSpanning(FrameOffset(48), method_register, FrameOffset(48));
- __ StoreStackOffsetToThread(ThreadOffset32(512), FrameOffset(4096));
- __ StoreStackPointerToThread(ThreadOffset32(512));
+ __ StoreStackPointerToThread(ThreadOffset32(512), false);
+ __ StoreStackPointerToThread(ThreadOffset32(512), true);
+
+ // MoveArguments
+ static constexpr FrameOffset kInvalidReferenceOffset =
+ JNIMacroAssembler<kArmPointerSize>::kInvalidReferenceOffset;
+ static constexpr size_t kNativePointerSize = static_cast<size_t>(kArmPointerSize);
+ // Normal or @FastNative with parameters (Object, long, long, int, Object).
+ // Note: This shall not spill the reference R1 to [sp, #36]. The JNI compiler spills
+ // references in an separate initial pass before moving arguments and creating `jobject`s.
+ ArgumentLocation move_dests1[] = {
+ ArgumentLocation(ArmManagedRegister::FromCoreRegister(R2), kNativePointerSize),
+ ArgumentLocation(FrameOffset(0), 2 * kVRegSize),
+ ArgumentLocation(FrameOffset(8), 2 * kVRegSize),
+ ArgumentLocation(FrameOffset(16), kVRegSize),
+ ArgumentLocation(FrameOffset(20), kNativePointerSize),
+ };
+ ArgumentLocation move_srcs1[] = {
+ ArgumentLocation(ArmManagedRegister::FromCoreRegister(R1), kVRegSize),
+ ArgumentLocation(ArmManagedRegister::FromRegisterPair(R2_R3), 2 * kVRegSize),
+ ArgumentLocation(FrameOffset(48), 2 * kVRegSize),
+ ArgumentLocation(FrameOffset(56), kVRegSize),
+ ArgumentLocation(FrameOffset(60), kVRegSize),
+ };
+ FrameOffset move_refs1[] {
+ FrameOffset(36),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(60),
+ };
+ __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests1),
+ ArrayRef<ArgumentLocation>(move_srcs1),
+ ArrayRef<FrameOffset>(move_refs1));
+ // @CriticalNative with parameters (long, long, long, int).
+ ArgumentLocation move_dests2[] = {
+ ArgumentLocation(ArmManagedRegister::FromRegisterPair(R0_R1), 2 * kVRegSize),
+ ArgumentLocation(ArmManagedRegister::FromRegisterPair(R2_R3), 2 * kVRegSize),
+ ArgumentLocation(FrameOffset(0), 2 * kVRegSize),
+ ArgumentLocation(FrameOffset(8), kVRegSize),
+ };
+ ArgumentLocation move_srcs2[] = {
+ ArgumentLocation(ArmManagedRegister::FromRegisterPair(R2_R3), 2 * kVRegSize),
+ ArgumentLocation(FrameOffset(28), kVRegSize),
+ ArgumentLocation(FrameOffset(32), 2 * kVRegSize),
+ ArgumentLocation(FrameOffset(40), kVRegSize),
+ };
+ FrameOffset move_refs2[] {
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ FrameOffset(kInvalidReferenceOffset),
+ };
+ __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests2),
+ ArrayRef<ArgumentLocation>(move_srcs2),
+ ArrayRef<FrameOffset>(move_refs2));
// Other
__ Call(method_register, FrameOffset(48));
__ Copy(FrameOffset(48), FrameOffset(44), 4);
- __ CopyRawPtrFromThread(FrameOffset(44), ThreadOffset32(512));
- __ CopyRef(FrameOffset(48), FrameOffset(44));
__ GetCurrentThread(method_register);
__ GetCurrentThread(FrameOffset(48));
__ Move(hidden_arg_register, method_register, 4);
@@ -176,7 +224,6 @@ TEST_F(ArmVIXLAssemblerTest, VixlJniHelpers) {
__ CreateJObject(high_register, FrameOffset(48), high_register, true);
__ CreateJObject(high_register, FrameOffset(48), high_register, false);
__ CreateJObject(method_register, FrameOffset(48), high_register, true);
- __ CreateJObject(FrameOffset(48), FrameOffset(64), true);
__ CreateJObject(method_register, FrameOffset(0), high_register, true);
__ CreateJObject(method_register, FrameOffset(1028), high_register, true);
__ CreateJObject(high_register, FrameOffset(1028), high_register, true);
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index b6c6025e41..aea7f14762 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -1,258 +1,259 @@
const char* const VixlJniHelpersResults = {
- " 0: 2d e9 e0 4d push.w {r5, r6, r7, r8, r10, r11, lr}\n"
- " 4: 2d ed 10 8a vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}\n"
- " 8: 81 b0 sub sp, #4\n"
- " a: 00 90 str r0, [sp]\n"
- " c: 19 91 str r1, [sp, #100]\n"
- " e: 8d ed 1a 0a vstr s0, [sp, #104]\n"
- " 12: 1b 92 str r2, [sp, #108]\n"
- " 14: 1c 93 str r3, [sp, #112]\n"
- " 16: 88 b0 sub sp, #32\n"
- " 18: ad f5 80 5d sub.w sp, sp, #4096\n"
- " 1c: 08 98 ldr r0, [sp, #32]\n"
- " 1e: 1f 98 ldr r0, [sp, #124]\n"
- " 20: 21 98 ldr r0, [sp, #132]\n"
- " 22: ff 98 ldr r0, [sp, #1020]\n"
- " 24: dd f8 00 04 ldr.w r0, [sp, #1024]\n"
- " 28: dd f8 fc cf ldr.w r12, [sp, #4092]\n"
- " 2c: 0d f5 80 5c add.w r12, sp, #4096\n"
- " 30: dc f8 00 c0 ldr.w r12, [r12]\n"
- " 34: d9 f8 00 c2 ldr.w r12, [r9, #512]\n"
- " 38: dc f8 80 00 ldr.w r0, [r12, #128]\n"
- " 3c: 08 90 str r0, [sp, #32]\n"
- " 3e: 1f 90 str r0, [sp, #124]\n"
- " 40: 21 90 str r0, [sp, #132]\n"
- " 42: ff 90 str r0, [sp, #1020]\n"
- " 44: cd f8 00 04 str.w r0, [sp, #1024]\n"
- " 48: cd f8 fc cf str.w r12, [sp, #4092]\n"
- " 4c: 4d f8 04 5d str r5, [sp, #-4]!\n"
- " 50: 0d f5 80 55 add.w r5, sp, #4096\n"
- " 54: c5 f8 04 c0 str.w r12, [r5, #4]\n"
- " 58: 5d f8 04 5b ldr r5, [sp], #4\n"
- " 5c: 4f f0 ff 0c mov.w r12, #255\n"
- " 60: cd f8 30 c0 str.w r12, [sp, #48]\n"
- " 64: 6f f0 7f 4c mvn r12, #4278190080\n"
- " 68: cd f8 30 c0 str.w r12, [sp, #48]\n"
- " 6c: cd f8 30 c0 str.w r12, [sp, #48]\n"
- " 70: cd f8 30 c0 str.w r12, [sp, #48]\n"
- " 74: 0c 90 str r0, [sp, #48]\n"
- " 76: dd f8 30 c0 ldr.w r12, [sp, #48]\n"
- " 7a: cd f8 34 c0 str.w r12, [sp, #52]\n"
- " 7e: 0d f5 80 5c add.w r12, sp, #4096\n"
- " 82: c9 f8 00 c2 str.w r12, [r9, #512]\n"
- " 86: c9 f8 00 d2 str.w sp, [r9, #512]\n"
- " 8a: d0 f8 30 e0 ldr.w lr, [r0, #48]\n"
- " 8e: f0 47 blx lr\n"
- " 90: dd f8 2c c0 ldr.w r12, [sp, #44]\n"
- " 94: cd f8 30 c0 str.w r12, [sp, #48]\n"
- " 98: d9 f8 00 c2 ldr.w r12, [r9, #512]\n"
- " 9c: cd f8 2c c0 str.w r12, [sp, #44]\n"
- " a0: dd f8 2c c0 ldr.w r12, [sp, #44]\n"
- " a4: cd f8 30 c0 str.w r12, [sp, #48]\n"
- " a8: 48 46 mov r0, r9\n"
- " aa: cd f8 30 90 str.w r9, [sp, #48]\n"
- " ae: 04 46 mov r4, r0\n"
- " b0: 0d f1 30 0c add.w r12, sp, #48\n"
- " b4: bb f1 00 0f cmp.w r11, #0\n"
- " b8: 18 bf it ne\n"
- " ba: e3 46 movne r11, r12\n"
- " bc: 0d f1 30 0b add.w r11, sp, #48\n"
- " c0: 5f ea 0b 00 movs.w r0, r11\n"
- " c4: 18 bf it ne\n"
- " c6: 0c a8 addne r0, sp, #48\n"
- " c8: dd f8 40 c0 ldr.w r12, [sp, #64]\n"
- " cc: bc f1 00 0f cmp.w r12, #0\n"
- " d0: 18 bf it ne\n"
- " d2: 0d f1 40 0c addne.w r12, sp, #64\n"
- " d6: cd f8 30 c0 str.w r12, [sp, #48]\n"
- " da: 5f ea 0b 00 movs.w r0, r11\n"
- " de: 18 bf it ne\n"
- " e0: 00 a8 addne r0, sp, #0\n"
- " e2: 0d f2 04 40 addw r0, sp, #1028\n"
- " e6: bb f1 00 0f cmp.w r11, #0\n"
- " ea: 08 bf it eq\n"
- " ec: 58 46 moveq r0, r11\n"
- " ee: 0d f2 04 4c addw r12, sp, #1028\n"
- " f2: bb f1 00 0f cmp.w r11, #0\n"
- " f6: 18 bf it ne\n"
- " f8: e3 46 movne r11, r12\n"
- " fa: d9 f8 94 c0 ldr.w r12, [r9, #148]\n"
- " fe: bc f1 00 0f cmp.w r12, #0\n"
- " 102: 71 d1 bne 0x1e8 @ imm = #226\n"
- " 104: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 108: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 10c: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 110: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 114: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 118: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 11c: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 120: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 124: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 128: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 12c: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 130: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 134: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 138: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 13c: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 140: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 144: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 148: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 14c: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 150: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 154: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 158: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 15c: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 160: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 164: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 168: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 16c: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 170: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 174: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 178: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 17c: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 180: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 184: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 188: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 18c: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 190: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 194: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 198: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 19c: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1a0: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1a4: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1a8: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1ac: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1b0: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1b4: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1b8: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1bc: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1c0: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1c4: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1c8: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1cc: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1d0: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1d4: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1d8: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1dc: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1e0: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1e4: 00 f0 02 b8 b.w 0x1ec @ imm = #4\n"
- " 1e8: 00 f0 1b b8 b.w 0x222 @ imm = #54\n"
- " 1ec: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1f0: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1f4: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1f8: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 1fc: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 200: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 204: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 208: cd f8 ff c7 str.w r12, [sp, #2047]\n"
- " 20c: 0d f5 80 5d add.w sp, sp, #4096\n"
- " 210: 08 b0 add sp, #32\n"
- " 212: 01 b0 add sp, #4\n"
- " 214: bd ec 10 8a vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}\n"
- " 218: bd e8 e0 4d pop.w {r5, r6, r7, r8, r10, r11, lr}\n"
- " 21c: d9 f8 24 80 ldr.w r8, [r9, #36]\n"
- " 220: 70 47 bx lr\n"
- " 222: d9 f8 94 00 ldr.w r0, [r9, #148]\n"
- " 226: d9 f8 c8 e2 ldr.w lr, [r9, #712]\n"
- " 22a: f0 47 blx lr\n"
+ " 0: e92d 4de0 push.w {r5, r6, r7, r8, r10, r11, lr}\n"
+ " 4: ed2d 8a10 vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}\n"
+ " 8: b081 sub sp, #4\n"
+ " a: 9000 str r0, [sp]\n"
+ " c: 9119 str r1, [sp, #100]\n"
+ " e: ed8d 0a1a vstr s0, [sp, #104]\n"
+ " 12: 921b str r2, [sp, #108]\n"
+ " 14: 931c str r3, [sp, #112]\n"
+ " 16: b088 sub sp, #32\n"
+ " 18: f5ad 5d80 sub.w sp, sp, #4096\n"
+ " 1c: 9808 ldr r0, [sp, #32]\n"
+ " 1e: 981f ldr r0, [sp, #124]\n"
+ " 20: 9821 ldr r0, [sp, #132]\n"
+ " 22: 98ff ldr r0, [sp, #1020]\n"
+ " 24: f8dd 0400 ldr.w r0, [sp, #1024]\n"
+ " 28: f8dd cffc ldr.w r12, [sp, #4092]\n"
+ " 2c: f50d 5c80 add.w r12, sp, #4096\n"
+ " 30: f8dc c000 ldr.w r12, [r12]\n"
+ " 34: f8d9 c200 ldr.w r12, [r9, #512]\n"
+ " 38: 9008 str r0, [sp, #32]\n"
+ " 3a: 901f str r0, [sp, #124]\n"
+ " 3c: 9021 str r0, [sp, #132]\n"
+ " 3e: 90ff str r0, [sp, #1020]\n"
+ " 40: f8cd 0400 str.w r0, [sp, #1024]\n"
+ " 44: f8cd cffc str.w r12, [sp, #4092]\n"
+ " 48: f84d 5d04 str r5, [sp, #-4]!\n"
+ " 4c: f50d 5580 add.w r5, sp, #4096\n"
+ " 50: f8c5 c004 str.w r12, [r5, #4]\n"
+ " 54: f85d 5b04 ldr r5, [sp], #4\n"
+ " 58: f8cd c030 str.w r12, [sp, #48]\n"
+ " 5c: f8c9 d200 str.w sp, [r9, #512]\n"
+ " 60: f04d 0c02 orr r12, sp, #2\n"
+ " 64: f8c9 c200 str.w r12, [r9, #512]\n"
+ " 68: a909 add r1, sp, #36\n"
+ " 6a: e9cd 2300 strd r2, r3, [sp]\n"
+ " 6e: e9dd 020c ldrd r0, r2, [sp, #48]\n"
+ " 72: e9cd 0202 strd r0, r2, [sp, #8]\n"
+ " 76: e9dd 020e ldrd r0, r2, [sp, #56]\n"
+ " 7a: 2a00 cmp r2, #0\n"
+ " 7c: bf18 it ne\n"
+ " 7e: aa0f addne r2, sp, #60\n"
+ " 80: e9cd 0204 strd r0, r2, [sp, #16]\n"
+ " 84: 460a mov r2, r1\n"
+ " 86: e9dd 0108 ldrd r0, r1, [sp, #32]\n"
+ " 8a: e9cd 0100 strd r0, r1, [sp]\n"
+ " 8e: f8dd c028 ldr.w r12, [sp, #40]\n"
+ " 92: f8cd c008 str.w r12, [sp, #8]\n"
+ " 96: 4610 mov r0, r2\n"
+ " 98: 4619 mov r1, r3\n"
+ " 9a: 9a07 ldr r2, [sp, #28]\n"
+ " 9c: 9b08 ldr r3, [sp, #32]\n"
+ " 9e: f8d0 e030 ldr.w lr, [r0, #48]\n"
+ " a2: 47f0 blx lr\n"
+ " a4: f8dd c02c ldr.w r12, [sp, #44]\n"
+ " a8: f8cd c030 str.w r12, [sp, #48]\n"
+ " ac: 4648 mov r0, r9\n"
+ " ae: f8cd 9030 str.w r9, [sp, #48]\n"
+ " b2: 4604 mov r4, r0\n"
+ " b4: f10d 0c30 add.w r12, sp, #48\n"
+ " b8: f1bb 0f00 cmp.w r11, #0\n"
+ " bc: bf18 it ne\n"
+ " be: 46e3 movne r11, r12\n"
+ " c0: f10d 0b30 add.w r11, sp, #48\n"
+ " c4: ea5f 000b movs.w r0, r11\n"
+ " c8: bf18 it ne\n"
+ " ca: a80c addne r0, sp, #48\n"
+ " cc: ea5f 000b movs.w r0, r11\n"
+ " d0: bf18 it ne\n"
+ " d2: a800 addne r0, sp, #0\n"
+ " d4: f20d 4004 addw r0, sp, #1028\n"
+ " d8: f1bb 0f00 cmp.w r11, #0\n"
+ " dc: bf08 it eq\n"
+ " de: 4658 moveq r0, r11\n"
+ " e0: f20d 4c04 addw r12, sp, #1028\n"
+ " e4: f1bb 0f00 cmp.w r11, #0\n"
+ " e8: bf18 it ne\n"
+ " ea: 46e3 movne r11, r12\n"
+ " ec: f8d9 c09c ldr.w r12, [r9, #156]\n"
+ " f0: f1bc 0f00 cmp.w r12, #0\n"
+ " f4: d16f bne 0x1d6 @ imm = #222\n"
+ " f6: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " fa: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " fe: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 102: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 106: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 10a: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 10e: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 112: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 116: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 11a: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 11e: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 122: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 126: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 12a: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 12e: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 132: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 136: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 13a: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 13e: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 142: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 146: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 14a: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 14e: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 152: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 156: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 15a: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 15e: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 162: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 166: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 16a: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 16e: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 172: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 176: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 17a: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 17e: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 182: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 186: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 18a: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 18e: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 192: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 196: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 19a: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 19e: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 1a2: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 1a6: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 1aa: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 1ae: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 1b2: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 1b6: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 1ba: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 1be: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 1c2: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 1c6: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 1ca: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 1ce: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 1d2: f000 b803 b.w 0x1dc @ imm = #6\n"
+ " 1d6: f000 b81e b.w 0x216 @ imm = #60\n"
+ " 1da: 0000 movs r0, r0\n"
+ " 1dc: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 1e0: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 1e4: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 1e8: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 1ec: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 1f0: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 1f4: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 1f8: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 1fc: f8cd c7ff str.w r12, [sp, #2047]\n"
+ " 200: f50d 5d80 add.w sp, sp, #4096\n"
+ " 204: b008 add sp, #32\n"
+ " 206: b001 add sp, #4\n"
+ " 208: ecbd 8a10 vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}\n"
+ " 20c: e8bd 4de0 pop.w {r5, r6, r7, r8, r10, r11, lr}\n"
+ " 210: f8d9 8024 ldr.w r8, [r9, #36]\n"
+ " 214: 4770 bx lr\n"
+ " 216: f8d9 009c ldr.w r0, [r9, #156]\n"
+ " 21a: f8d9 e2d0 ldr.w lr, [r9, #720]\n"
+ " 21e: 47f0 blx lr\n"
};
const char* const VixlLoadFromOffsetResults = {
- " 0: e2 68 ldr r2, [r4, #12]\n"
- " 2: d4 f8 ff 2f ldr.w r2, [r4, #4095]\n"
- " 6: 04 f5 80 52 add.w r2, r4, #4096\n"
- " a: 12 68 ldr r2, [r2]\n"
- " c: 04 f5 80 12 add.w r2, r4, #1048576\n"
- " 10: d2 f8 a4 20 ldr.w r2, [r2, #164]\n"
- " 14: 4f f4 80 52 mov.w r2, #4096\n"
- " 18: c0 f2 10 02 movt r2, #16\n"
- " 1c: 22 44 add r2, r4\n"
- " 1e: 12 68 ldr r2, [r2]\n"
- " 20: 4f f4 80 5c mov.w r12, #4096\n"
- " 24: c0 f2 10 0c movt r12, #16\n"
- " 28: 64 44 add r4, r12\n"
- " 2a: 24 68 ldr r4, [r4]\n"
- " 2c: a2 89 ldrh r2, [r4, #12]\n"
- " 2e: b4 f8 ff 2f ldrh.w r2, [r4, #4095]\n"
- " 32: 04 f5 80 52 add.w r2, r4, #4096\n"
- " 36: 12 88 ldrh r2, [r2]\n"
- " 38: 04 f5 80 12 add.w r2, r4, #1048576\n"
- " 3c: b2 f8 a4 20 ldrh.w r2, [r2, #164]\n"
- " 40: 4f f4 80 52 mov.w r2, #4096\n"
- " 44: c0 f2 10 02 movt r2, #16\n"
- " 48: 22 44 add r2, r4\n"
- " 4a: 12 88 ldrh r2, [r2]\n"
- " 4c: 4f f4 80 5c mov.w r12, #4096\n"
- " 50: c0 f2 10 0c movt r12, #16\n"
- " 54: 64 44 add r4, r12\n"
- " 56: 24 88 ldrh r4, [r4]\n"
- " 58: d4 e9 03 23 ldrd r2, r3, [r4, #12]\n"
- " 5c: d4 e9 ff 23 ldrd r2, r3, [r4, #1020]\n"
- " 60: 04 f5 80 62 add.w r2, r4, #1024\n"
- " 64: d2 e9 00 23 ldrd r2, r3, [r2]\n"
- " 68: 04 f5 80 22 add.w r2, r4, #262144\n"
- " 6c: d2 e9 29 23 ldrd r2, r3, [r2, #164]\n"
- " 70: 4f f4 80 62 mov.w r2, #1024\n"
- " 74: c0 f2 04 02 movt r2, #4\n"
- " 78: 22 44 add r2, r4\n"
- " 7a: d2 e9 00 23 ldrd r2, r3, [r2]\n"
- " 7e: 4f f4 80 6c mov.w r12, #1024\n"
- " 82: c0 f2 04 0c movt r12, #4\n"
- " 86: 64 44 add r4, r12\n"
- " 88: d4 e9 00 45 ldrd r4, r5, [r4]\n"
- " 8c: dc f8 0c 00 ldr.w r0, [r12, #12]\n"
- " 90: a4 f5 80 12 sub.w r2, r4, #1048576\n"
- " 94: d2 f8 a4 20 ldr.w r2, [r2, #164]\n"
- " 98: 94 f9 0c 20 ldrsb.w r2, [r4, #12]\n"
- " 9c: 22 7b ldrb r2, [r4, #12]\n"
- " 9e: b4 f9 0c 20 ldrsh.w r2, [r4, #12]\n"
+ " 0: 68e2 ldr r2, [r4, #12]\n"
+ " 2: f8d4 2fff ldr.w r2, [r4, #4095]\n"
+ " 6: f504 5280 add.w r2, r4, #4096\n"
+ " a: 6812 ldr r2, [r2]\n"
+ " c: f504 1280 add.w r2, r4, #1048576\n"
+ " 10: f8d2 20a4 ldr.w r2, [r2, #164]\n"
+ " 14: f44f 5280 mov.w r2, #4096\n"
+ " 18: f2c0 0210 movt r2, #16\n"
+ " 1c: 4422 add r2, r4\n"
+ " 1e: 6812 ldr r2, [r2]\n"
+ " 20: f44f 5c80 mov.w r12, #4096\n"
+ " 24: f2c0 0c10 movt r12, #16\n"
+ " 28: 4464 add r4, r12\n"
+ " 2a: 6824 ldr r4, [r4]\n"
+ " 2c: 89a2 ldrh r2, [r4, #12]\n"
+ " 2e: f8b4 2fff ldrh.w r2, [r4, #4095]\n"
+ " 32: f504 5280 add.w r2, r4, #4096\n"
+ " 36: 8812 ldrh r2, [r2]\n"
+ " 38: f504 1280 add.w r2, r4, #1048576\n"
+ " 3c: f8b2 20a4 ldrh.w r2, [r2, #164]\n"
+ " 40: f44f 5280 mov.w r2, #4096\n"
+ " 44: f2c0 0210 movt r2, #16\n"
+ " 48: 4422 add r2, r4\n"
+ " 4a: 8812 ldrh r2, [r2]\n"
+ " 4c: f44f 5c80 mov.w r12, #4096\n"
+ " 50: f2c0 0c10 movt r12, #16\n"
+ " 54: 4464 add r4, r12\n"
+ " 56: 8824 ldrh r4, [r4]\n"
+ " 58: e9d4 2303 ldrd r2, r3, [r4, #12]\n"
+ " 5c: e9d4 23ff ldrd r2, r3, [r4, #1020]\n"
+ " 60: f504 6280 add.w r2, r4, #1024\n"
+ " 64: e9d2 2300 ldrd r2, r3, [r2]\n"
+ " 68: f504 2280 add.w r2, r4, #262144\n"
+ " 6c: e9d2 2329 ldrd r2, r3, [r2, #164]\n"
+ " 70: f44f 6280 mov.w r2, #1024\n"
+ " 74: f2c0 0204 movt r2, #4\n"
+ " 78: 4422 add r2, r4\n"
+ " 7a: e9d2 2300 ldrd r2, r3, [r2]\n"
+ " 7e: f44f 6c80 mov.w r12, #1024\n"
+ " 82: f2c0 0c04 movt r12, #4\n"
+ " 86: 4464 add r4, r12\n"
+ " 88: e9d4 4500 ldrd r4, r5, [r4]\n"
+ " 8c: f8dc 000c ldr.w r0, [r12, #12]\n"
+ " 90: f5a4 1280 sub.w r2, r4, #1048576\n"
+ " 94: f8d2 20a4 ldr.w r2, [r2, #164]\n"
+ " 98: f994 200c ldrsb.w r2, [r4, #12]\n"
+ " 9c: 7b22 ldrb r2, [r4, #12]\n"
+ " 9e: f9b4 200c ldrsh.w r2, [r4, #12]\n"
};
const char* const VixlStoreToOffsetResults = {
- " 0: e2 60 str r2, [r4, #12]\n"
- " 2: c4 f8 ff 2f str.w r2, [r4, #4095]\n"
- " 6: 04 f5 80 5c add.w r12, r4, #4096\n"
- " a: cc f8 00 20 str.w r2, [r12]\n"
- " e: 04 f5 80 1c add.w r12, r4, #1048576\n"
- " 12: cc f8 a4 20 str.w r2, [r12, #164]\n"
- " 16: 4f f4 80 5c mov.w r12, #4096\n"
- " 1a: c0 f2 10 0c movt r12, #16\n"
- " 1e: a4 44 add r12, r4\n"
- " 20: cc f8 00 20 str.w r2, [r12]\n"
- " 24: 4f f4 80 5c mov.w r12, #4096\n"
- " 28: c0 f2 10 0c movt r12, #16\n"
- " 2c: a4 44 add r12, r4\n"
- " 2e: cc f8 00 40 str.w r4, [r12]\n"
- " 32: a2 81 strh r2, [r4, #12]\n"
- " 34: a4 f8 ff 2f strh.w r2, [r4, #4095]\n"
- " 38: 04 f5 80 5c add.w r12, r4, #4096\n"
- " 3c: ac f8 00 20 strh.w r2, [r12]\n"
- " 40: 04 f5 80 1c add.w r12, r4, #1048576\n"
- " 44: ac f8 a4 20 strh.w r2, [r12, #164]\n"
- " 48: 4f f4 80 5c mov.w r12, #4096\n"
- " 4c: c0 f2 10 0c movt r12, #16\n"
- " 50: a4 44 add r12, r4\n"
- " 52: ac f8 00 20 strh.w r2, [r12]\n"
- " 56: 4f f4 80 5c mov.w r12, #4096\n"
- " 5a: c0 f2 10 0c movt r12, #16\n"
- " 5e: a4 44 add r12, r4\n"
- " 60: ac f8 00 40 strh.w r4, [r12]\n"
- " 64: c4 e9 03 23 strd r2, r3, [r4, #12]\n"
- " 68: c4 e9 ff 23 strd r2, r3, [r4, #1020]\n"
- " 6c: 04 f5 80 6c add.w r12, r4, #1024\n"
- " 70: cc e9 00 23 strd r2, r3, [r12]\n"
- " 74: 04 f5 80 2c add.w r12, r4, #262144\n"
- " 78: cc e9 29 23 strd r2, r3, [r12, #164]\n"
- " 7c: 4f f4 80 6c mov.w r12, #1024\n"
- " 80: c0 f2 04 0c movt r12, #4\n"
- " 84: a4 44 add r12, r4\n"
- " 86: cc e9 00 23 strd r2, r3, [r12]\n"
- " 8a: 4f f4 80 6c mov.w r12, #1024\n"
- " 8e: c0 f2 04 0c movt r12, #4\n"
- " 92: a4 44 add r12, r4\n"
- " 94: cc e9 00 45 strd r4, r5, [r12]\n"
- " 98: cc f8 0c 00 str.w r0, [r12, #12]\n"
- " 9c: a4 f5 80 1c sub.w r12, r4, #1048576\n"
- " a0: cc f8 a4 20 str.w r2, [r12, #164]\n"
- " a4: 22 73 strb r2, [r4, #12]\n"
+ " 0: 60e2 str r2, [r4, #12]\n"
+ " 2: f8c4 2fff str.w r2, [r4, #4095]\n"
+ " 6: f504 5c80 add.w r12, r4, #4096\n"
+ " a: f8cc 2000 str.w r2, [r12]\n"
+ " e: f504 1c80 add.w r12, r4, #1048576\n"
+ " 12: f8cc 20a4 str.w r2, [r12, #164]\n"
+ " 16: f44f 5c80 mov.w r12, #4096\n"
+ " 1a: f2c0 0c10 movt r12, #16\n"
+ " 1e: 44a4 add r12, r4\n"
+ " 20: f8cc 2000 str.w r2, [r12]\n"
+ " 24: f44f 5c80 mov.w r12, #4096\n"
+ " 28: f2c0 0c10 movt r12, #16\n"
+ " 2c: 44a4 add r12, r4\n"
+ " 2e: f8cc 4000 str.w r4, [r12]\n"
+ " 32: 81a2 strh r2, [r4, #12]\n"
+ " 34: f8a4 2fff strh.w r2, [r4, #4095]\n"
+ " 38: f504 5c80 add.w r12, r4, #4096\n"
+ " 3c: f8ac 2000 strh.w r2, [r12]\n"
+ " 40: f504 1c80 add.w r12, r4, #1048576\n"
+ " 44: f8ac 20a4 strh.w r2, [r12, #164]\n"
+ " 48: f44f 5c80 mov.w r12, #4096\n"
+ " 4c: f2c0 0c10 movt r12, #16\n"
+ " 50: 44a4 add r12, r4\n"
+ " 52: f8ac 2000 strh.w r2, [r12]\n"
+ " 56: f44f 5c80 mov.w r12, #4096\n"
+ " 5a: f2c0 0c10 movt r12, #16\n"
+ " 5e: 44a4 add r12, r4\n"
+ " 60: f8ac 4000 strh.w r4, [r12]\n"
+ " 64: e9c4 2303 strd r2, r3, [r4, #12]\n"
+ " 68: e9c4 23ff strd r2, r3, [r4, #1020]\n"
+ " 6c: f504 6c80 add.w r12, r4, #1024\n"
+ " 70: e9cc 2300 strd r2, r3, [r12]\n"
+ " 74: f504 2c80 add.w r12, r4, #262144\n"
+ " 78: e9cc 2329 strd r2, r3, [r12, #164]\n"
+ " 7c: f44f 6c80 mov.w r12, #1024\n"
+ " 80: f2c0 0c04 movt r12, #4\n"
+ " 84: 44a4 add r12, r4\n"
+ " 86: e9cc 2300 strd r2, r3, [r12]\n"
+ " 8a: f44f 6c80 mov.w r12, #1024\n"
+ " 8e: f2c0 0c04 movt r12, #4\n"
+ " 92: 44a4 add r12, r4\n"
+ " 94: e9cc 4500 strd r4, r5, [r12]\n"
+ " 98: f8cc 000c str.w r0, [r12, #12]\n"
+ " 9c: f5a4 1c80 sub.w r12, r4, #1048576\n"
+ " a0: f8cc 20a4 str.w r2, [r12, #164]\n"
+ " a4: 7322 strb r2, [r4, #12]\n"
};
diff --git a/compiler/utils/atomic_dex_ref_map-inl.h b/compiler/utils/atomic_dex_ref_map-inl.h
index 377b7fe352..5f68a7c701 100644
--- a/compiler/utils/atomic_dex_ref_map-inl.h
+++ b/compiler/utils/atomic_dex_ref_map-inl.h
@@ -21,12 +21,13 @@
#include <type_traits>
+#include "base/macros.h"
#include "dex/class_reference.h"
#include "dex/dex_file-inl.h"
#include "dex/method_reference.h"
#include "dex/type_reference.h"
-namespace art {
+namespace art HIDDEN {
template <typename DexFileReferenceType, typename Value>
inline size_t AtomicDexRefMap<DexFileReferenceType, Value>::NumberOfDexIndices(
diff --git a/compiler/utils/atomic_dex_ref_map.h b/compiler/utils/atomic_dex_ref_map.h
index a8c285f765..b10fef50c5 100644
--- a/compiler/utils/atomic_dex_ref_map.h
+++ b/compiler/utils/atomic_dex_ref_map.h
@@ -19,10 +19,11 @@
#include "base/atomic.h"
#include "base/dchecked_vector.h"
+#include "base/macros.h"
#include "base/safe_map.h"
#include "dex/dex_file_reference.h"
-namespace art {
+namespace art HIDDEN {
class DexFile;
diff --git a/compiler/utils/atomic_dex_ref_map_test.cc b/compiler/utils/atomic_dex_ref_map_test.cc
index 864531ed91..329735b796 100644
--- a/compiler/utils/atomic_dex_ref_map_test.cc
+++ b/compiler/utils/atomic_dex_ref_map_test.cc
@@ -18,12 +18,13 @@
#include <memory>
+#include "base/macros.h"
#include "common_runtime_test.h"
#include "dex/dex_file-inl.h"
#include "dex/method_reference.h"
#include "scoped_thread_state_change-inl.h"
-namespace art {
+namespace art HIDDEN {
class AtomicDexRefMapTest : public CommonRuntimeTest {};
diff --git a/compiler/utils/dedupe_set-inl.h b/compiler/utils/dedupe_set-inl.h
index d4a9cc829b..db744c53f7 100644
--- a/compiler/utils/dedupe_set-inl.h
+++ b/compiler/utils/dedupe_set-inl.h
@@ -27,11 +27,12 @@
#include "android-base/stringprintf.h"
#include "base/hash_set.h"
+#include "base/macros.h"
#include "base/mutex.h"
#include "base/stl_util.h"
#include "base/time_utils.h"
-namespace art {
+namespace art HIDDEN {
template <typename InKey,
typename StoreKey,
diff --git a/compiler/utils/dedupe_set.h b/compiler/utils/dedupe_set.h
index a1ba208d2c..42db8e3ca0 100644
--- a/compiler/utils/dedupe_set.h
+++ b/compiler/utils/dedupe_set.h
@@ -23,7 +23,7 @@
#include "base/macros.h"
-namespace art {
+namespace art HIDDEN {
class Thread;
diff --git a/compiler/utils/dedupe_set_test.cc b/compiler/utils/dedupe_set_test.cc
index b390508ed4..89385e7c82 100644
--- a/compiler/utils/dedupe_set_test.cc
+++ b/compiler/utils/dedupe_set_test.cc
@@ -21,11 +21,12 @@
#include <vector>
#include "base/array_ref.h"
+#include "base/macros.h"
#include "dedupe_set-inl.h"
#include "gtest/gtest.h"
#include "thread-current-inl.h"
-namespace art {
+namespace art HIDDEN {
class DedupeSetTestHashFunc {
public:
diff --git a/compiler/utils/jni_macro_assembler.cc b/compiler/utils/jni_macro_assembler.cc
index d6d49f8faa..8b47b38e63 100644
--- a/compiler/utils/jni_macro_assembler.cc
+++ b/compiler/utils/jni_macro_assembler.cc
@@ -35,7 +35,7 @@
#include "base/globals.h"
#include "base/memory_region.h"
-namespace art {
+namespace art HIDDEN {
using MacroAsm32UniquePtr = std::unique_ptr<JNIMacroAssembler<PointerSize::k32>>;
@@ -58,6 +58,7 @@ MacroAsm32UniquePtr JNIMacroAssembler<PointerSize::k32>::Create(
return MacroAsm32UniquePtr(new (allocator) x86::X86JNIMacroAssembler(allocator));
#endif
default:
+ UNUSED(allocator);
LOG(FATAL) << "Unknown/unsupported 4B InstructionSet: " << instruction_set;
UNREACHABLE();
}
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
index 7022e3df92..0c729705dc 100644
--- a/compiler/utils/jni_macro_assembler.h
+++ b/compiler/utils/jni_macro_assembler.h
@@ -30,7 +30,7 @@
#include "managed_register.h"
#include "offsets.h"
-namespace art {
+namespace art HIDDEN {
class ArenaAllocator;
class DebugFrameOpCodeWriterForAssembler;
@@ -118,37 +118,18 @@ class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> {
// Store routines
virtual void Store(FrameOffset offs, ManagedRegister src, size_t size) = 0;
virtual void Store(ManagedRegister base, MemberOffset offs, ManagedRegister src, size_t size) = 0;
- virtual void StoreRef(FrameOffset dest, ManagedRegister src) = 0;
virtual void StoreRawPtr(FrameOffset dest, ManagedRegister src) = 0;
- virtual void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) = 0;
-
- virtual void StoreStackOffsetToThread(ThreadOffset<kPointerSize> thr_offs,
- FrameOffset fr_offs) = 0;
-
- virtual void StoreStackPointerToThread(ThreadOffset<kPointerSize> thr_offs) = 0;
-
- virtual void StoreSpanning(FrameOffset dest,
- ManagedRegister src,
- FrameOffset in_off) = 0;
+ // Stores stack pointer by tagging it if required so we can walk the stack. In debuggable runtimes
+ // we use tag to tell if we are using JITed code or AOT code. In non-debuggable runtimes we never
+ // use JITed code when AOT code is present. So checking for AOT code is sufficient to detect which
+ // code is being executed. We avoid tagging in non-debuggable runtimes to reduce instructions.
+ virtual void StoreStackPointerToThread(ThreadOffset<kPointerSize> thr_offs, bool tag_sp) = 0;
// Load routines
virtual void Load(ManagedRegister dest, FrameOffset src, size_t size) = 0;
virtual void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) = 0;
- virtual void LoadFromThread(ManagedRegister dest,
- ThreadOffset<kPointerSize> src,
- size_t size) = 0;
-
- virtual void LoadRef(ManagedRegister dest, FrameOffset src) = 0;
- // If unpoison_reference is true and kPoisonReference is true, then we negate the read reference.
- virtual void LoadRef(ManagedRegister dest,
- ManagedRegister base,
- MemberOffset offs,
- bool unpoison_reference) = 0;
-
- virtual void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) = 0;
-
virtual void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset<kPointerSize> offs) = 0;
// Copying routines
@@ -165,53 +146,7 @@ class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> {
virtual void Move(ManagedRegister dest, ManagedRegister src, size_t size) = 0;
- virtual void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset<kPointerSize> thr_offs) = 0;
-
- virtual void CopyRawPtrToThread(ThreadOffset<kPointerSize> thr_offs,
- FrameOffset fr_offs,
- ManagedRegister scratch) = 0;
-
- virtual void CopyRef(FrameOffset dest, FrameOffset src) = 0;
- virtual void CopyRef(FrameOffset dest,
- ManagedRegister base,
- MemberOffset offs,
- bool unpoison_reference) = 0;
-
- virtual void Copy(FrameOffset dest, FrameOffset src, size_t size) = 0;
-
- virtual void Copy(FrameOffset dest,
- ManagedRegister src_base,
- Offset src_offset,
- ManagedRegister scratch,
- size_t size) = 0;
-
- virtual void Copy(ManagedRegister dest_base,
- Offset dest_offset,
- FrameOffset src,
- ManagedRegister scratch,
- size_t size) = 0;
-
- virtual void Copy(FrameOffset dest,
- FrameOffset src_base,
- Offset src_offset,
- ManagedRegister scratch,
- size_t size) = 0;
-
- virtual void Copy(ManagedRegister dest,
- Offset dest_offset,
- ManagedRegister src,
- Offset src_offset,
- ManagedRegister scratch,
- size_t size) = 0;
-
- virtual void Copy(FrameOffset dest,
- Offset dest_offset,
- FrameOffset src,
- Offset src_offset,
- ManagedRegister scratch,
- size_t size) = 0;
-
- virtual void MemoryBarrier(ManagedRegister scratch) = 0;
+ virtual void Move(ManagedRegister dst, size_t value) = 0;
// Sign extension
virtual void SignExtend(ManagedRegister mreg, size_t size) = 0;
@@ -223,20 +158,10 @@ class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> {
virtual void GetCurrentThread(ManagedRegister dest) = 0;
virtual void GetCurrentThread(FrameOffset dest_offset) = 0;
- // Set up `out_reg` to hold a `jobject` (`StackReference<Object>*` to a spilled value),
- // or to be null if the value is null and `null_allowed`. `in_reg` holds a possibly
- // stale reference that can be used to avoid loading the spilled value to
- // see if the value is null.
- virtual void CreateJObject(ManagedRegister out_reg,
- FrameOffset spilled_reference_offset,
- ManagedRegister in_reg,
- bool null_allowed) = 0;
-
- // Set up `out_off` to hold a `jobject` (`StackReference<Object>*` to a spilled value),
- // or to be null if the value is null and `null_allowed`.
- virtual void CreateJObject(FrameOffset out_off,
- FrameOffset spilled_reference_offset,
- bool null_allowed) = 0;
+ // Decode JNI transition or local `jobject`. For (weak) global `jobject`, jump to slow path.
+ virtual void DecodeJNITransitionOrLocalJObject(ManagedRegister reg,
+ JNIMacroLabel* slow_path,
+ JNIMacroLabel* resume) = 0;
// Heap::VerifyObject on src. In some cases (such as a reference to this) we
// know that src may not be null.
@@ -282,6 +207,8 @@ class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> {
virtual void TestMarkBit(ManagedRegister ref,
JNIMacroLabel* label,
JNIMacroUnaryCondition cond) = 0;
+ // Emit a conditional jump to label if the loaded value from specified locations is not zero.
+ virtual void TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) = 0;
// Code at this offset will serve as the target for the Jump call.
virtual void Bind(JNIMacroLabel* label) = 0;
diff --git a/compiler/utils/jni_macro_assembler_test.h b/compiler/utils/jni_macro_assembler_test.h
index e77177e43e..ac8e7d3010 100644
--- a/compiler/utils/jni_macro_assembler_test.h
+++ b/compiler/utils/jni_macro_assembler_test.h
@@ -20,6 +20,7 @@
#include "jni_macro_assembler.h"
#include "assembler_test_base.h"
+#include "base/macros.h"
#include "base/malloc_arena_pool.h"
#include "common_runtime_test.h" // For ScratchFile
@@ -30,7 +31,7 @@
#include <fstream>
#include <iterator>
-namespace art {
+namespace art HIDDEN {
template<typename Ass>
class JNIMacroAssemblerTest : public AssemblerTestBase {
@@ -39,7 +40,7 @@ class JNIMacroAssemblerTest : public AssemblerTestBase {
return assembler_.get();
}
- typedef std::string (*TestFn)(JNIMacroAssemblerTest* assembler_test, Ass* assembler);
+ using TestFn = std::string (*)(JNIMacroAssemblerTest *, Ass *);
void DriverFn(TestFn f, const std::string& test_name) {
DriverWrapper(f(this, assembler_.get()), test_name);
diff --git a/compiler/utils/label.h b/compiler/utils/label.h
index 282500b1b7..0368d90a26 100644
--- a/compiler/utils/label.h
+++ b/compiler/utils/label.h
@@ -20,7 +20,9 @@
#include <android-base/logging.h>
#include <android-base/macros.h>
-namespace art {
+#include "base/macros.h"
+
+namespace art HIDDEN {
class Assembler;
class AssemblerBuffer;
diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h
index a3b33ba94d..ba6b46b3b3 100644
--- a/compiler/utils/managed_register.h
+++ b/compiler/utils/managed_register.h
@@ -20,9 +20,10 @@
#include <type_traits>
#include <vector>
+#include "base/macros.h"
#include "base/value_object.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
class ArmManagedRegister;
@@ -31,6 +32,10 @@ namespace arm64 {
class Arm64ManagedRegister;
} // namespace arm64
+namespace riscv64 {
+class Riscv64ManagedRegister;
+} // namespace riscv64
+
namespace x86 {
class X86ManagedRegister;
} // namespace x86
@@ -50,6 +55,7 @@ class ManagedRegister : public ValueObject {
constexpr arm::ArmManagedRegister AsArm() const;
constexpr arm64::Arm64ManagedRegister AsArm64() const;
+ constexpr riscv64::Riscv64ManagedRegister AsRiscv64() const;
constexpr x86::X86ManagedRegister AsX86() const;
constexpr x86_64::X86_64ManagedRegister AsX86_64() const;
diff --git a/compiler/utils/riscv64/managed_register_riscv64.cc b/compiler/utils/riscv64/managed_register_riscv64.cc
new file mode 100644
index 0000000000..560019ae09
--- /dev/null
+++ b/compiler/utils/riscv64/managed_register_riscv64.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "managed_register_riscv64.h"
+
+#include "base/globals.h"
+
+namespace art {
+namespace riscv64 {
+
+bool Riscv64ManagedRegister::Overlaps(const Riscv64ManagedRegister& other) const {
+ if (IsNoRegister() || other.IsNoRegister()) {
+ return false;
+ }
+ CHECK(IsValidManagedRegister());
+ CHECK(other.IsValidManagedRegister());
+
+ return Equals(other);
+}
+
+void Riscv64ManagedRegister::Print(std::ostream& os) const {
+ if (!IsValidManagedRegister()) {
+ os << "No Register";
+ } else if (IsXRegister()) {
+ os << "XRegister: " << static_cast<int>(AsXRegister());
+ } else if (IsFRegister()) {
+ os << "FRegister: " << static_cast<int>(AsFRegister());
+ } else {
+ os << "??: " << RegId();
+ }
+}
+
+std::ostream& operator<<(std::ostream& os, const Riscv64ManagedRegister& reg) {
+ reg.Print(os);
+ return os;
+}
+
+} // namespace riscv64
+} // namespace art
diff --git a/compiler/utils/riscv64/managed_register_riscv64.h b/compiler/utils/riscv64/managed_register_riscv64.h
new file mode 100644
index 0000000000..8e02a9dcc8
--- /dev/null
+++ b/compiler/utils/riscv64/managed_register_riscv64.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_RISCV64_MANAGED_REGISTER_RISCV64_H_
+#define ART_COMPILER_UTILS_RISCV64_MANAGED_REGISTER_RISCV64_H_
+
+#include <android-base/logging.h>
+
+#include "arch/riscv64/registers_riscv64.h"
+#include "base/globals.h"
+#include "base/macros.h"
+#include "utils/managed_register.h"
+
+namespace art {
+namespace riscv64 {
+
+const int kNumberOfXRegIds = kNumberOfXRegisters;
+const int kNumberOfXAllocIds = kNumberOfXRegisters;
+
+const int kNumberOfFRegIds = kNumberOfFRegisters;
+const int kNumberOfFAllocIds = kNumberOfFRegisters;
+
+const int kNumberOfRegIds = kNumberOfXRegIds + kNumberOfFRegIds;
+const int kNumberOfAllocIds = kNumberOfXAllocIds + kNumberOfFAllocIds;
+
+// Register ids map:
+// [0..R[ core registers (enum XRegister)
+// [R..F[ floating-point registers (enum FRegister)
+// where
+// R = kNumberOfXRegIds
+// F = R + kNumberOfFRegIds
+
+// An instance of class 'ManagedRegister' represents a single Riscv64 register.
+// A register can be one of the following:
+// * core register (enum XRegister)
+// * floating-point register (enum FRegister)
+//
+// 'ManagedRegister::NoRegister()' provides an invalid register.
+// There is a one-to-one mapping between ManagedRegister and register id.
+class Riscv64ManagedRegister : public ManagedRegister {
+ public:
+ constexpr XRegister AsXRegister() const {
+ CHECK(IsXRegister());
+ return static_cast<XRegister>(id_);
+ }
+
+ constexpr FRegister AsFRegister() const {
+ CHECK(IsFRegister());
+ return static_cast<FRegister>(id_ - kNumberOfXRegIds);
+ }
+
+ constexpr bool IsXRegister() const {
+ CHECK(IsValidManagedRegister());
+ return (0 <= id_) && (id_ < kNumberOfXRegIds);
+ }
+
+ constexpr bool IsFRegister() const {
+ CHECK(IsValidManagedRegister());
+ const int test = id_ - kNumberOfXRegIds;
+ return (0 <= test) && (test < kNumberOfFRegIds);
+ }
+
+ void Print(std::ostream& os) const;
+
+ // Returns true if the two managed-registers ('this' and 'other') overlap.
+ // Either managed-register may be the NoRegister. If both are the NoRegister
+ // then false is returned.
+ bool Overlaps(const Riscv64ManagedRegister& other) const;
+
+ static constexpr Riscv64ManagedRegister FromXRegister(XRegister r) {
+ CHECK_NE(r, kNoXRegister);
+ return FromRegId(r);
+ }
+
+ static constexpr Riscv64ManagedRegister FromFRegister(FRegister r) {
+ CHECK_NE(r, kNoFRegister);
+ return FromRegId(r + kNumberOfXRegIds);
+ }
+
+ private:
+ constexpr bool IsValidManagedRegister() const { return (0 <= id_) && (id_ < kNumberOfRegIds); }
+
+ constexpr int RegId() const {
+ CHECK(!IsNoRegister());
+ return id_;
+ }
+
+ int AllocId() const {
+ CHECK(IsValidManagedRegister());
+ CHECK_LT(id_, kNumberOfAllocIds);
+ return id_;
+ }
+
+ int AllocIdLow() const;
+ int AllocIdHigh() const;
+
+ friend class ManagedRegister;
+
+ explicit constexpr Riscv64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+
+ static constexpr Riscv64ManagedRegister FromRegId(int reg_id) {
+ Riscv64ManagedRegister reg(reg_id);
+ CHECK(reg.IsValidManagedRegister());
+ return reg;
+ }
+};
+
+std::ostream& operator<<(std::ostream& os, const Riscv64ManagedRegister& reg);
+
+} // namespace riscv64
+
+constexpr inline riscv64::Riscv64ManagedRegister ManagedRegister::AsRiscv64() const {
+ riscv64::Riscv64ManagedRegister reg(id_);
+ CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
+ return reg;
+}
+
+} // namespace art
+
+#endif // ART_COMPILER_UTILS_RISCV64_MANAGED_REGISTER_RISCV64_H_
diff --git a/compiler/utils/riscv64/managed_register_riscv64_test.cc b/compiler/utils/riscv64/managed_register_riscv64_test.cc
new file mode 100644
index 0000000000..c6ad2dc38a
--- /dev/null
+++ b/compiler/utils/riscv64/managed_register_riscv64_test.cc
@@ -0,0 +1,204 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "managed_register_riscv64.h"
+
+#include "base/globals.h"
+#include "gtest/gtest.h"
+
+namespace art {
+namespace riscv64 {
+
+TEST(Riscv64ManagedRegister, NoRegister) {
+ Riscv64ManagedRegister reg = ManagedRegister::NoRegister().AsRiscv64();
+ EXPECT_TRUE(reg.IsNoRegister());
+}
+
+TEST(Riscv64ManagedRegister, XRegister) {
+ Riscv64ManagedRegister reg = Riscv64ManagedRegister::FromXRegister(Zero);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsXRegister());
+ EXPECT_FALSE(reg.IsFRegister());
+ EXPECT_EQ(Zero, reg.AsXRegister());
+
+ reg = Riscv64ManagedRegister::FromXRegister(RA);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsXRegister());
+ EXPECT_FALSE(reg.IsFRegister());
+ EXPECT_EQ(RA, reg.AsXRegister());
+
+ reg = Riscv64ManagedRegister::FromXRegister(SP);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsXRegister());
+ EXPECT_FALSE(reg.IsFRegister());
+ EXPECT_EQ(SP, reg.AsXRegister());
+
+ reg = Riscv64ManagedRegister::FromXRegister(GP);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsXRegister());
+ EXPECT_FALSE(reg.IsFRegister());
+ EXPECT_EQ(GP, reg.AsXRegister());
+
+ reg = Riscv64ManagedRegister::FromXRegister(T0);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsXRegister());
+ EXPECT_FALSE(reg.IsFRegister());
+ EXPECT_EQ(T0, reg.AsXRegister());
+
+ reg = Riscv64ManagedRegister::FromXRegister(T2);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsXRegister());
+ EXPECT_FALSE(reg.IsFRegister());
+ EXPECT_EQ(T2, reg.AsXRegister());
+
+ reg = Riscv64ManagedRegister::FromXRegister(S0);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsXRegister());
+ EXPECT_FALSE(reg.IsFRegister());
+ EXPECT_EQ(S0, reg.AsXRegister());
+
+ reg = Riscv64ManagedRegister::FromXRegister(A0);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsXRegister());
+ EXPECT_FALSE(reg.IsFRegister());
+ EXPECT_EQ(A0, reg.AsXRegister());
+
+ reg = Riscv64ManagedRegister::FromXRegister(A7);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsXRegister());
+ EXPECT_FALSE(reg.IsFRegister());
+ EXPECT_EQ(A7, reg.AsXRegister());
+
+ reg = Riscv64ManagedRegister::FromXRegister(S2);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsXRegister());
+ EXPECT_FALSE(reg.IsFRegister());
+ EXPECT_EQ(S2, reg.AsXRegister());
+
+ reg = Riscv64ManagedRegister::FromXRegister(T3);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsXRegister());
+ EXPECT_FALSE(reg.IsFRegister());
+ EXPECT_EQ(T3, reg.AsXRegister());
+}
+
+TEST(Riscv64ManagedRegister, FRegister) {
+ Riscv64ManagedRegister reg = Riscv64ManagedRegister::FromFRegister(FT0);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsXRegister());
+ EXPECT_TRUE(reg.IsFRegister());
+ EXPECT_EQ(FT0, reg.AsFRegister());
+ EXPECT_TRUE(reg.Equals(Riscv64ManagedRegister::FromFRegister(FT0)));
+
+ reg = Riscv64ManagedRegister::FromFRegister(FT1);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsXRegister());
+ EXPECT_TRUE(reg.IsFRegister());
+ EXPECT_EQ(FT1, reg.AsFRegister());
+ EXPECT_TRUE(reg.Equals(Riscv64ManagedRegister::FromFRegister(FT1)));
+
+ reg = Riscv64ManagedRegister::FromFRegister(FS0);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsXRegister());
+ EXPECT_TRUE(reg.IsFRegister());
+ EXPECT_EQ(FS0, reg.AsFRegister());
+ EXPECT_TRUE(reg.Equals(Riscv64ManagedRegister::FromFRegister(FS0)));
+
+ reg = Riscv64ManagedRegister::FromFRegister(FA0);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsXRegister());
+ EXPECT_TRUE(reg.IsFRegister());
+ EXPECT_EQ(FA0, reg.AsFRegister());
+ EXPECT_TRUE(reg.Equals(Riscv64ManagedRegister::FromFRegister(FA0)));
+
+ reg = Riscv64ManagedRegister::FromFRegister(FA7);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsXRegister());
+ EXPECT_TRUE(reg.IsFRegister());
+ EXPECT_EQ(FA7, reg.AsFRegister());
+ EXPECT_TRUE(reg.Equals(Riscv64ManagedRegister::FromFRegister(FA7)));
+
+ reg = Riscv64ManagedRegister::FromFRegister(FS4);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsXRegister());
+ EXPECT_TRUE(reg.IsFRegister());
+ EXPECT_EQ(FS4, reg.AsFRegister());
+ EXPECT_TRUE(reg.Equals(Riscv64ManagedRegister::FromFRegister(FS4)));
+
+ reg = Riscv64ManagedRegister::FromFRegister(FT11);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsXRegister());
+ EXPECT_TRUE(reg.IsFRegister());
+ EXPECT_EQ(FT11, reg.AsFRegister());
+ EXPECT_TRUE(reg.Equals(Riscv64ManagedRegister::FromFRegister(FT11)));
+}
+
+TEST(Riscv64ManagedRegister, Equals) {
+ ManagedRegister no_reg = ManagedRegister::NoRegister();
+ EXPECT_TRUE(no_reg.Equals(Riscv64ManagedRegister::NoRegister()));
+ EXPECT_FALSE(no_reg.Equals(Riscv64ManagedRegister::FromXRegister(Zero)));
+ EXPECT_FALSE(no_reg.Equals(Riscv64ManagedRegister::FromXRegister(A1)));
+ EXPECT_FALSE(no_reg.Equals(Riscv64ManagedRegister::FromXRegister(S2)));
+ EXPECT_FALSE(no_reg.Equals(Riscv64ManagedRegister::FromFRegister(FT0)));
+ EXPECT_FALSE(no_reg.Equals(Riscv64ManagedRegister::FromFRegister(FT11)));
+
+ Riscv64ManagedRegister reg_Zero = Riscv64ManagedRegister::FromXRegister(Zero);
+ EXPECT_FALSE(reg_Zero.Equals(Riscv64ManagedRegister::NoRegister()));
+ EXPECT_TRUE(reg_Zero.Equals(Riscv64ManagedRegister::FromXRegister(Zero)));
+ EXPECT_FALSE(reg_Zero.Equals(Riscv64ManagedRegister::FromXRegister(A1)));
+ EXPECT_FALSE(reg_Zero.Equals(Riscv64ManagedRegister::FromXRegister(S2)));
+ EXPECT_FALSE(reg_Zero.Equals(Riscv64ManagedRegister::FromFRegister(FT0)));
+ EXPECT_FALSE(reg_Zero.Equals(Riscv64ManagedRegister::FromFRegister(FT11)));
+
+ Riscv64ManagedRegister reg_A1 = Riscv64ManagedRegister::FromXRegister(A1);
+ EXPECT_FALSE(reg_A1.Equals(Riscv64ManagedRegister::NoRegister()));
+ EXPECT_FALSE(reg_A1.Equals(Riscv64ManagedRegister::FromXRegister(Zero)));
+ EXPECT_FALSE(reg_A1.Equals(Riscv64ManagedRegister::FromXRegister(A0)));
+ EXPECT_TRUE(reg_A1.Equals(Riscv64ManagedRegister::FromXRegister(A1)));
+ EXPECT_FALSE(reg_A1.Equals(Riscv64ManagedRegister::FromXRegister(S2)));
+ EXPECT_FALSE(reg_A1.Equals(Riscv64ManagedRegister::FromFRegister(FT0)));
+ EXPECT_FALSE(reg_A1.Equals(Riscv64ManagedRegister::FromFRegister(FT11)));
+
+ Riscv64ManagedRegister reg_S2 = Riscv64ManagedRegister::FromXRegister(S2);
+ EXPECT_FALSE(reg_S2.Equals(Riscv64ManagedRegister::NoRegister()));
+ EXPECT_FALSE(reg_S2.Equals(Riscv64ManagedRegister::FromXRegister(Zero)));
+ EXPECT_FALSE(reg_S2.Equals(Riscv64ManagedRegister::FromXRegister(A1)));
+ EXPECT_FALSE(reg_S2.Equals(Riscv64ManagedRegister::FromXRegister(S1)));
+ EXPECT_TRUE(reg_S2.Equals(Riscv64ManagedRegister::FromXRegister(S2)));
+ EXPECT_FALSE(reg_S2.Equals(Riscv64ManagedRegister::FromFRegister(FT0)));
+ EXPECT_FALSE(reg_S2.Equals(Riscv64ManagedRegister::FromFRegister(FT11)));
+
+ Riscv64ManagedRegister reg_F0 = Riscv64ManagedRegister::FromFRegister(FT0);
+ EXPECT_FALSE(reg_F0.Equals(Riscv64ManagedRegister::NoRegister()));
+ EXPECT_FALSE(reg_F0.Equals(Riscv64ManagedRegister::FromXRegister(Zero)));
+ EXPECT_FALSE(reg_F0.Equals(Riscv64ManagedRegister::FromXRegister(A1)));
+ EXPECT_FALSE(reg_F0.Equals(Riscv64ManagedRegister::FromXRegister(S2)));
+ EXPECT_TRUE(reg_F0.Equals(Riscv64ManagedRegister::FromFRegister(FT0)));
+ EXPECT_FALSE(reg_F0.Equals(Riscv64ManagedRegister::FromFRegister(FT1)));
+ EXPECT_FALSE(reg_F0.Equals(Riscv64ManagedRegister::FromFRegister(FT11)));
+
+ Riscv64ManagedRegister reg_F31 = Riscv64ManagedRegister::FromFRegister(FT11);
+ EXPECT_FALSE(reg_F31.Equals(Riscv64ManagedRegister::NoRegister()));
+ EXPECT_FALSE(reg_F31.Equals(Riscv64ManagedRegister::FromXRegister(Zero)));
+ EXPECT_FALSE(reg_F31.Equals(Riscv64ManagedRegister::FromXRegister(A1)));
+ EXPECT_FALSE(reg_F31.Equals(Riscv64ManagedRegister::FromXRegister(S2)));
+ EXPECT_FALSE(reg_F31.Equals(Riscv64ManagedRegister::FromFRegister(FT0)));
+ EXPECT_FALSE(reg_F31.Equals(Riscv64ManagedRegister::FromFRegister(FT1)));
+ EXPECT_TRUE(reg_F31.Equals(Riscv64ManagedRegister::FromFRegister(FT11)));
+}
+
+} // namespace riscv64
+} // namespace art
diff --git a/compiler/utils/stack_checks.h b/compiler/utils/stack_checks.h
index c348f2c8ee..d0fff73df3 100644
--- a/compiler/utils/stack_checks.h
+++ b/compiler/utils/stack_checks.h
@@ -18,8 +18,9 @@
#define ART_COMPILER_UTILS_STACK_CHECKS_H_
#include "arch/instruction_set.h"
+#include "base/macros.h"
-namespace art {
+namespace art HIDDEN {
// Size of a frame that we definitely consider large. Anything larger than this should
// definitely get a stack overflow check.
diff --git a/compiler/utils/swap_space.cc b/compiler/utils/swap_space.cc
deleted file mode 100644
index 6e0773bba4..0000000000
--- a/compiler/utils/swap_space.cc
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "swap_space.h"
-
-#include <sys/mman.h>
-
-#include <algorithm>
-#include <numeric>
-
-#include "base/bit_utils.h"
-#include "base/macros.h"
-#include "base/mutex.h"
-#include "thread-current-inl.h"
-
-namespace art {
-
-// The chunk size by which the swap file is increased and mapped.
-static constexpr size_t kMininumMapSize = 16 * MB;
-
-static constexpr bool kCheckFreeMaps = false;
-
-template <typename FreeBySizeSet>
-static void DumpFreeMap(const FreeBySizeSet& free_by_size) {
- size_t last_size = static_cast<size_t>(-1);
- for (const auto& entry : free_by_size) {
- if (last_size != entry.size) {
- last_size = entry.size;
- LOG(INFO) << "Size " << last_size;
- }
- LOG(INFO) << " 0x" << std::hex << entry.free_by_start_entry->Start()
- << " size=" << std::dec << entry.free_by_start_entry->size;
- }
-}
-
-void SwapSpace::RemoveChunk(FreeBySizeSet::const_iterator free_by_size_pos) {
- auto free_by_start_pos = free_by_size_pos->free_by_start_entry;
- free_by_size_.erase(free_by_size_pos);
- free_by_start_.erase(free_by_start_pos);
-}
-
-inline void SwapSpace::InsertChunk(const SpaceChunk& chunk) {
- DCHECK_NE(chunk.size, 0u);
- auto insert_result = free_by_start_.insert(chunk);
- DCHECK(insert_result.second);
- free_by_size_.emplace(chunk.size, insert_result.first);
-}
-
-SwapSpace::SwapSpace(int fd, size_t initial_size)
- : fd_(fd),
- size_(0),
- lock_("SwapSpace lock", static_cast<LockLevel>(LockLevel::kDefaultMutexLevel - 1)) {
- // Assume that the file is unlinked.
-
- InsertChunk(NewFileChunk(initial_size));
-}
-
-SwapSpace::~SwapSpace() {
- // Unmap all mmapped chunks. Nothing should be allocated anymore at
- // this point, so there should be only full size chunks in free_by_start_.
- for (const SpaceChunk& chunk : free_by_start_) {
- if (munmap(chunk.ptr, chunk.size) != 0) {
- PLOG(ERROR) << "Failed to unmap swap space chunk at "
- << static_cast<const void*>(chunk.ptr) << " size=" << chunk.size;
- }
- }
- // All arenas are backed by the same file. Just close the descriptor.
- close(fd_);
-}
-
-template <typename FreeByStartSet, typename FreeBySizeSet>
-static size_t CollectFree(const FreeByStartSet& free_by_start, const FreeBySizeSet& free_by_size) {
- if (free_by_start.size() != free_by_size.size()) {
- LOG(FATAL) << "Size: " << free_by_start.size() << " vs " << free_by_size.size();
- }
-
- // Calculate over free_by_size.
- size_t sum1 = 0;
- for (const auto& entry : free_by_size) {
- sum1 += entry.free_by_start_entry->size;
- }
-
- // Calculate over free_by_start.
- size_t sum2 = 0;
- for (const auto& entry : free_by_start) {
- sum2 += entry.size;
- }
-
- if (sum1 != sum2) {
- LOG(FATAL) << "Sum: " << sum1 << " vs " << sum2;
- }
- return sum1;
-}
-
-void* SwapSpace::Alloc(size_t size) {
- MutexLock lock(Thread::Current(), lock_);
- size = RoundUp(size, 8U);
-
- // Check the free list for something that fits.
- // TODO: Smarter implementation. Global biggest chunk, ...
- auto it = free_by_start_.empty()
- ? free_by_size_.end()
- : free_by_size_.lower_bound(FreeBySizeEntry { size, free_by_start_.begin() });
- if (it != free_by_size_.end()) {
- SpaceChunk old_chunk = *it->free_by_start_entry;
- if (old_chunk.size == size) {
- RemoveChunk(it);
- } else {
- // Avoid deallocating and allocating the std::set<> nodes.
- // This would be much simpler if we could use replace() from Boost.Bimap.
-
- // The free_by_start_ map contains disjoint intervals ordered by the `ptr`.
- // Shrinking the interval does not affect the ordering.
- it->free_by_start_entry->ptr += size;
- it->free_by_start_entry->size -= size;
-
- auto node = free_by_size_.extract(it);
- node.value().size -= size;
- free_by_size_.insert(std::move(node));
- }
- return old_chunk.ptr;
- } else {
- // Not a big enough free chunk, need to increase file size.
- SpaceChunk new_chunk = NewFileChunk(size);
- if (new_chunk.size != size) {
- // Insert the remainder.
- SpaceChunk remainder = { new_chunk.ptr + size, new_chunk.size - size };
- InsertChunk(remainder);
- }
- return new_chunk.ptr;
- }
-}
-
-SwapSpace::SpaceChunk SwapSpace::NewFileChunk(size_t min_size) {
-#if !defined(__APPLE__)
- size_t next_part = std::max(RoundUp(min_size, kPageSize), RoundUp(kMininumMapSize, kPageSize));
- int result = TEMP_FAILURE_RETRY(ftruncate64(fd_, size_ + next_part));
- if (result != 0) {
- PLOG(FATAL) << "Unable to increase swap file.";
- }
- uint8_t* ptr = reinterpret_cast<uint8_t*>(
- mmap(nullptr, next_part, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, size_));
- if (ptr == MAP_FAILED) {
- LOG(ERROR) << "Unable to mmap new swap file chunk.";
- LOG(ERROR) << "Current size: " << size_ << " requested: " << next_part << "/" << min_size;
- LOG(ERROR) << "Free list:";
- DumpFreeMap(free_by_size_);
- LOG(ERROR) << "In free list: " << CollectFree(free_by_start_, free_by_size_);
- PLOG(FATAL) << "Unable to mmap new swap file chunk.";
- }
- size_ += next_part;
- SpaceChunk new_chunk = {ptr, next_part};
- return new_chunk;
-#else
- UNUSED(min_size, kMininumMapSize);
- LOG(FATAL) << "No swap file support on the Mac.";
- UNREACHABLE();
-#endif
-}
-
-// TODO: Full coalescing.
-void SwapSpace::Free(void* ptr, size_t size) {
- MutexLock lock(Thread::Current(), lock_);
- size = RoundUp(size, 8U);
-
- size_t free_before = 0;
- if (kCheckFreeMaps) {
- free_before = CollectFree(free_by_start_, free_by_size_);
- }
-
- SpaceChunk chunk = { reinterpret_cast<uint8_t*>(ptr), size };
- auto it = free_by_start_.lower_bound(chunk);
- if (it != free_by_start_.begin()) {
- auto prev = it;
- --prev;
- CHECK_LE(prev->End(), chunk.Start());
- if (prev->End() == chunk.Start()) {
- // Merge *prev with this chunk.
- chunk.size += prev->size;
- chunk.ptr -= prev->size;
- auto erase_pos = free_by_size_.find(FreeBySizeEntry { prev->size, prev });
- DCHECK(erase_pos != free_by_size_.end());
- RemoveChunk(erase_pos);
- // "prev" is invalidated but "it" remains valid.
- }
- }
- if (it != free_by_start_.end()) {
- CHECK_LE(chunk.End(), it->Start());
- if (chunk.End() == it->Start()) {
- // Merge *it with this chunk.
- chunk.size += it->size;
- auto erase_pos = free_by_size_.find(FreeBySizeEntry { it->size, it });
- DCHECK(erase_pos != free_by_size_.end());
- RemoveChunk(erase_pos);
- // "it" is invalidated but we don't need it anymore.
- }
- }
- InsertChunk(chunk);
-
- if (kCheckFreeMaps) {
- size_t free_after = CollectFree(free_by_start_, free_by_size_);
-
- if (free_after != free_before + size) {
- DumpFreeMap(free_by_size_);
- CHECK_EQ(free_after, free_before + size) << "Should be " << size << " difference from " << free_before;
- }
- }
-}
-
-} // namespace art
diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h
deleted file mode 100644
index 827e9a6366..0000000000
--- a/compiler/utils/swap_space.h
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_SWAP_SPACE_H_
-#define ART_COMPILER_UTILS_SWAP_SPACE_H_
-
-#include <stddef.h>
-#include <stdint.h>
-#include <cstdlib>
-#include <list>
-#include <set>
-#include <vector>
-
-#include <android-base/logging.h>
-
-#include "base/logging.h"
-#include "base/macros.h"
-#include "base/mutex.h"
-
-namespace art {
-
-// An arena pool that creates arenas backed by an mmaped file.
-class SwapSpace {
- public:
- SwapSpace(int fd, size_t initial_size);
- ~SwapSpace();
- void* Alloc(size_t size) REQUIRES(!lock_);
- void Free(void* ptr, size_t size) REQUIRES(!lock_);
-
- size_t GetSize() {
- return size_;
- }
-
- private:
- // Chunk of space.
- struct SpaceChunk {
- // We need mutable members as we keep these objects in a std::set<> (providing only const
- // access) but we modify these members while carefully preserving the std::set<> ordering.
- mutable uint8_t* ptr;
- mutable size_t size;
-
- uintptr_t Start() const {
- return reinterpret_cast<uintptr_t>(ptr);
- }
- uintptr_t End() const {
- return reinterpret_cast<uintptr_t>(ptr) + size;
- }
- };
-
- class SortChunkByPtr {
- public:
- bool operator()(const SpaceChunk& a, const SpaceChunk& b) const {
- return reinterpret_cast<uintptr_t>(a.ptr) < reinterpret_cast<uintptr_t>(b.ptr);
- }
- };
-
- using FreeByStartSet = std::set<SpaceChunk, SortChunkByPtr>;
-
- // Map size to an iterator to free_by_start_'s entry.
- struct FreeBySizeEntry {
- FreeBySizeEntry(size_t sz, FreeByStartSet::const_iterator entry)
- : size(sz), free_by_start_entry(entry) { }
-
- // We need mutable members as we keep these objects in a std::set<> (providing only const
- // access) but we modify these members while carefully preserving the std::set<> ordering.
- mutable size_t size;
- mutable FreeByStartSet::const_iterator free_by_start_entry;
- };
- struct FreeBySizeComparator {
- bool operator()(const FreeBySizeEntry& lhs, const FreeBySizeEntry& rhs) const {
- if (lhs.size != rhs.size) {
- return lhs.size < rhs.size;
- } else {
- return lhs.free_by_start_entry->Start() < rhs.free_by_start_entry->Start();
- }
- }
- };
- using FreeBySizeSet = std::set<FreeBySizeEntry, FreeBySizeComparator>;
-
- SpaceChunk NewFileChunk(size_t min_size) REQUIRES(lock_);
-
- void RemoveChunk(FreeBySizeSet::const_iterator free_by_size_pos) REQUIRES(lock_);
- void InsertChunk(const SpaceChunk& chunk) REQUIRES(lock_);
-
- int fd_;
- size_t size_;
-
- // NOTE: Boost.Bimap would be useful for the two following members.
-
- // Map start of a free chunk to its size.
- FreeByStartSet free_by_start_ GUARDED_BY(lock_);
- // Free chunks ordered by size.
- FreeBySizeSet free_by_size_ GUARDED_BY(lock_);
-
- mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
- DISALLOW_COPY_AND_ASSIGN(SwapSpace);
-};
-
-template <typename T> class SwapAllocator;
-
-template <>
-class SwapAllocator<void> {
- public:
- using value_type = void;
- using pointer = void*;
- using const_pointer = const void*;
-
- template <typename U>
- struct rebind {
- using other = SwapAllocator<U>;
- };
-
- explicit SwapAllocator(SwapSpace* swap_space) : swap_space_(swap_space) {}
-
- template <typename U>
- SwapAllocator(const SwapAllocator<U>& other)
- : swap_space_(other.swap_space_) {}
-
- SwapAllocator(const SwapAllocator& other) = default;
- SwapAllocator& operator=(const SwapAllocator& other) = default;
- ~SwapAllocator() = default;
-
- private:
- SwapSpace* swap_space_;
-
- template <typename U>
- friend class SwapAllocator;
-
- template <typename U>
- friend bool operator==(const SwapAllocator<U>& lhs, const SwapAllocator<U>& rhs);
-};
-
-template <typename T>
-class SwapAllocator {
- public:
- using value_type = T;
- using pointer = T*;
- using reference = T&;
- using const_pointer = const T*;
- using const_reference = const T&;
- using size_type = size_t;
- using difference_type = ptrdiff_t;
-
- template <typename U>
- struct rebind {
- using other = SwapAllocator<U>;
- };
-
- explicit SwapAllocator(SwapSpace* swap_space) : swap_space_(swap_space) {}
-
- template <typename U>
- SwapAllocator(const SwapAllocator<U>& other)
- : swap_space_(other.swap_space_) {}
-
- SwapAllocator(const SwapAllocator& other) = default;
- SwapAllocator& operator=(const SwapAllocator& other) = default;
- ~SwapAllocator() = default;
-
- size_type max_size() const {
- return static_cast<size_type>(-1) / sizeof(T);
- }
-
- pointer address(reference x) const { return &x; }
- const_pointer address(const_reference x) const { return &x; }
-
- pointer allocate(size_type n, SwapAllocator<void>::pointer hint ATTRIBUTE_UNUSED = nullptr) {
- DCHECK_LE(n, max_size());
- if (swap_space_ == nullptr) {
- T* result = reinterpret_cast<T*>(malloc(n * sizeof(T)));
- CHECK_IMPLIES(result == nullptr, n == 0u); // Abort if malloc() fails.
- return result;
- } else {
- return reinterpret_cast<T*>(swap_space_->Alloc(n * sizeof(T)));
- }
- }
- void deallocate(pointer p, size_type n) {
- if (swap_space_ == nullptr) {
- free(p);
- } else {
- swap_space_->Free(p, n * sizeof(T));
- }
- }
-
- void construct(pointer p, const_reference val) {
- new (static_cast<void*>(p)) value_type(val);
- }
- template <class U, class... Args>
- void construct(U* p, Args&&... args) {
- ::new (static_cast<void*>(p)) U(std::forward<Args>(args)...);
- }
- void destroy(pointer p) {
- p->~value_type();
- }
-
- inline bool operator==(SwapAllocator const& other) {
- return swap_space_ == other.swap_space_;
- }
- inline bool operator!=(SwapAllocator const& other) {
- return !operator==(other);
- }
-
- private:
- SwapSpace* swap_space_;
-
- template <typename U>
- friend class SwapAllocator;
-
- template <typename U>
- friend bool operator==(const SwapAllocator<U>& lhs, const SwapAllocator<U>& rhs);
-};
-
-template <typename T>
-inline bool operator==(const SwapAllocator<T>& lhs, const SwapAllocator<T>& rhs) {
- return lhs.swap_space_ == rhs.swap_space_;
-}
-
-template <typename T>
-inline bool operator!=(const SwapAllocator<T>& lhs, const SwapAllocator<T>& rhs) {
- return !(lhs == rhs);
-}
-
-template <typename T>
-using SwapVector = std::vector<T, SwapAllocator<T>>;
-template <typename T, typename Comparator>
-using SwapSet = std::set<T, Comparator, SwapAllocator<T>>;
-
-} // namespace art
-
-#endif // ART_COMPILER_UTILS_SWAP_SPACE_H_
diff --git a/compiler/utils/swap_space_test.cc b/compiler/utils/swap_space_test.cc
deleted file mode 100644
index 1650080e66..0000000000
--- a/compiler/utils/swap_space_test.cc
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "utils/swap_space.h"
-
-#include <fcntl.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#include <cstdio>
-
-#include "gtest/gtest.h"
-
-#include "base/os.h"
-#include "base/unix_file/fd_file.h"
-#include "common_runtime_test.h"
-
-namespace art {
-
-class SwapSpaceTest : public CommonRuntimeTest {
-};
-
-static void SwapTest(bool use_file) {
- ScratchFile scratch;
- int fd = scratch.GetFd();
- unlink(scratch.GetFilename().c_str());
-
- SwapSpace pool(fd, 1 * MB);
- SwapAllocator<void> alloc(use_file ? &pool : nullptr);
-
- SwapVector<int32_t> v(alloc);
- v.reserve(1000000);
- for (int32_t i = 0; i < 1000000; ++i) {
- v.push_back(i);
- EXPECT_EQ(i, v[i]);
- }
-
- SwapVector<int32_t> v2(alloc);
- v2.reserve(1000000);
- for (int32_t i = 0; i < 1000000; ++i) {
- v2.push_back(i);
- EXPECT_EQ(i, v2[i]);
- }
-
- SwapVector<int32_t> v3(alloc);
- v3.reserve(500000);
- for (int32_t i = 0; i < 1000000; ++i) {
- v3.push_back(i);
- EXPECT_EQ(i, v2[i]);
- }
-
- // Verify contents.
- for (int32_t i = 0; i < 1000000; ++i) {
- EXPECT_EQ(i, v[i]);
- EXPECT_EQ(i, v2[i]);
- EXPECT_EQ(i, v3[i]);
- }
-
- scratch.Close();
-}
-
-TEST_F(SwapSpaceTest, Memory) {
- SwapTest(false);
-}
-
-TEST_F(SwapSpaceTest, Swap) {
- SwapTest(true);
-}
-
-} // namespace art
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 861b27e6af..a6b90114b2 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -21,7 +21,7 @@
#include "entrypoints/quick/quick_entrypoints.h"
#include "thread.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
std::ostream& operator<<(std::ostream& os, const XmmRegister& reg) {
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index c346ba9235..0f7854dc5c 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -32,7 +32,7 @@
#include "offsets.h"
#include "utils/assembler.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
class Immediate : public ValueObject {
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 89c73c0ade..5da6f04402 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -17,11 +17,12 @@
#include "assembler_x86.h"
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "base/malloc_arena_pool.h"
#include "base/stl_util.h"
#include "utils/assembler_test.h"
-namespace art {
+namespace art HIDDEN {
TEST(AssemblerX86, CreateBuffer) {
MallocArenaPool pool;
diff --git a/compiler/utils/x86/constants_x86.h b/compiler/utils/x86/constants_x86.h
index 477b915bb9..0c0a7d4133 100644
--- a/compiler/utils/x86/constants_x86.h
+++ b/compiler/utils/x86/constants_x86.h
@@ -25,7 +25,7 @@
#include "base/globals.h"
#include "base/macros.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
enum ByteRegister {
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index 685f5f1b48..154e50b4e4 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -18,11 +18,12 @@
#include "base/casts.h"
#include "entrypoints/quick/quick_entrypoints.h"
+#include "indirect_reference_table.h"
#include "lock_word.h"
#include "thread.h"
#include "utils/assembler.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
static Register GetScratchRegister() {
@@ -165,36 +166,24 @@ void X86JNIMacroAssembler::Store(ManagedRegister mbase,
}
}
-void X86JNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
- X86ManagedRegister src = msrc.AsX86();
- CHECK(src.IsCpuRegister());
- __ movl(Address(ESP, dest), src.AsCpuRegister());
-}
-
void X86JNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
X86ManagedRegister src = msrc.AsX86();
CHECK(src.IsCpuRegister());
__ movl(Address(ESP, dest), src.AsCpuRegister());
}
-void X86JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm) {
- __ movl(Address(ESP, dest), Immediate(imm));
-}
-
-void X86JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs) {
- Register scratch = GetScratchRegister();
- __ leal(scratch, Address(ESP, fr_offs));
- __ fs()->movl(Address::Absolute(thr_offs), scratch);
-}
-
-void X86JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) {
- __ fs()->movl(Address::Absolute(thr_offs), ESP);
-}
-
-void X86JNIMacroAssembler::StoreSpanning(FrameOffset /*dst*/,
- ManagedRegister /*src*/,
- FrameOffset /*in_off*/) {
- UNIMPLEMENTED(FATAL); // this case only currently exists for ARM
+void X86JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs, bool tag_sp) {
+ if (tag_sp) {
+ // There is no free register, store contents onto stack and restore back later.
+ Register scratch = ECX;
+ __ movl(Address(ESP, -32), scratch);
+ __ movl(scratch, ESP);
+ __ orl(scratch, Immediate(0x2));
+ __ fs()->movl(Address::Absolute(thr_offs), scratch);
+ __ movl(scratch, Address(ESP, -32));
+ } else {
+ __ fs()->movl(Address::Absolute(thr_offs), ESP);
+ }
}
void X86JNIMacroAssembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) {
@@ -233,61 +222,6 @@ void X86JNIMacroAssembler::Load(ManagedRegister mdest,
}
}
-void X86JNIMacroAssembler::LoadFromThread(ManagedRegister mdest, ThreadOffset32 src, size_t size) {
- X86ManagedRegister dest = mdest.AsX86();
- if (dest.IsNoRegister()) {
- CHECK_EQ(0u, size);
- } else if (dest.IsCpuRegister()) {
- if (size == 1u) {
- __ fs()->movzxb(dest.AsCpuRegister(), Address::Absolute(src));
- } else {
- CHECK_EQ(4u, size);
- __ fs()->movl(dest.AsCpuRegister(), Address::Absolute(src));
- }
- } else if (dest.IsRegisterPair()) {
- CHECK_EQ(8u, size);
- __ fs()->movl(dest.AsRegisterPairLow(), Address::Absolute(src));
- __ fs()->movl(dest.AsRegisterPairHigh(), Address::Absolute(ThreadOffset32(src.Int32Value()+4)));
- } else if (dest.IsX87Register()) {
- if (size == 4) {
- __ fs()->flds(Address::Absolute(src));
- } else {
- __ fs()->fldl(Address::Absolute(src));
- }
- } else {
- CHECK(dest.IsXmmRegister());
- if (size == 4) {
- __ fs()->movss(dest.AsXmmRegister(), Address::Absolute(src));
- } else {
- __ fs()->movsd(dest.AsXmmRegister(), Address::Absolute(src));
- }
- }
-}
-
-void X86JNIMacroAssembler::LoadRef(ManagedRegister mdest, FrameOffset src) {
- X86ManagedRegister dest = mdest.AsX86();
- CHECK(dest.IsCpuRegister());
- __ movl(dest.AsCpuRegister(), Address(ESP, src));
-}
-
-void X86JNIMacroAssembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
- bool unpoison_reference) {
- X86ManagedRegister dest = mdest.AsX86();
- CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
- __ movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs));
- if (unpoison_reference) {
- __ MaybeUnpoisonHeapReference(dest.AsCpuRegister());
- }
-}
-
-void X86JNIMacroAssembler::LoadRawPtr(ManagedRegister mdest,
- ManagedRegister base,
- Offset offs) {
- X86ManagedRegister dest = mdest.AsX86();
- CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
- __ movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs));
-}
-
void X86JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) {
X86ManagedRegister dest = mdest.AsX86();
CHECK(dest.IsCpuRegister());
@@ -402,37 +336,9 @@ void X86JNIMacroAssembler::Move(ManagedRegister mdest, ManagedRegister msrc, siz
}
}
-void X86JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) {
- Register scratch = GetScratchRegister();
- __ movl(scratch, Address(ESP, src));
- __ movl(Address(ESP, dest), scratch);
-}
-
-void X86JNIMacroAssembler::CopyRef(FrameOffset dest,
- ManagedRegister base,
- MemberOffset offs,
- bool unpoison_reference) {
- Register scratch = GetScratchRegister();
- __ movl(scratch, Address(base.AsX86().AsCpuRegister(), offs));
- if (unpoison_reference) {
- __ MaybeUnpoisonHeapReference(scratch);
- }
- __ movl(Address(ESP, dest), scratch);
-}
-
-void X86JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) {
- Register scratch = GetScratchRegister();
- __ fs()->movl(scratch, Address::Absolute(thr_offs));
- __ movl(Address(ESP, fr_offs), scratch);
-}
-
-void X86JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs,
- FrameOffset fr_offs,
- ManagedRegister mscratch) {
- X86ManagedRegister scratch = mscratch.AsX86();
- CHECK(scratch.IsCpuRegister());
- Load(scratch, fr_offs, 4);
- __ fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
+void X86JNIMacroAssembler::Move(ManagedRegister mdest, size_t value) {
+ X86ManagedRegister dest = mdest.AsX86();
+ __ movl(dest.AsCpuRegister(), Immediate(value));
}
void X86JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) {
@@ -446,67 +352,6 @@ void X86JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size)
}
}
-void X86JNIMacroAssembler::Copy(FrameOffset /*dst*/,
- ManagedRegister /*src_base*/,
- Offset /*src_offset*/,
- ManagedRegister /*scratch*/,
- size_t /*size*/) {
- UNIMPLEMENTED(FATAL);
-}
-
-void X86JNIMacroAssembler::Copy(ManagedRegister dest_base,
- Offset dest_offset,
- FrameOffset src,
- ManagedRegister scratch,
- size_t size) {
- CHECK(scratch.IsNoRegister());
- CHECK_EQ(size, 4u);
- __ pushl(Address(ESP, src));
- __ popl(Address(dest_base.AsX86().AsCpuRegister(), dest_offset));
-}
-
-void X86JNIMacroAssembler::Copy(FrameOffset dest,
- FrameOffset src_base,
- Offset src_offset,
- ManagedRegister mscratch,
- size_t size) {
- Register scratch = mscratch.AsX86().AsCpuRegister();
- CHECK_EQ(size, 4u);
- __ movl(scratch, Address(ESP, src_base));
- __ movl(scratch, Address(scratch, src_offset));
- __ movl(Address(ESP, dest), scratch);
-}
-
-void X86JNIMacroAssembler::Copy(ManagedRegister dest,
- Offset dest_offset,
- ManagedRegister src,
- Offset src_offset,
- ManagedRegister scratch,
- size_t size) {
- CHECK_EQ(size, 4u);
- CHECK(scratch.IsNoRegister());
- __ pushl(Address(src.AsX86().AsCpuRegister(), src_offset));
- __ popl(Address(dest.AsX86().AsCpuRegister(), dest_offset));
-}
-
-void X86JNIMacroAssembler::Copy(FrameOffset dest,
- Offset dest_offset,
- FrameOffset src,
- Offset src_offset,
- ManagedRegister mscratch,
- size_t size) {
- Register scratch = mscratch.AsX86().AsCpuRegister();
- CHECK_EQ(size, 4u);
- CHECK_EQ(dest.Int32Value(), src.Int32Value());
- __ movl(scratch, Address(ESP, src));
- __ pushl(Address(scratch, src_offset));
- __ popl(Address(scratch, dest_offset));
-}
-
-void X86JNIMacroAssembler::MemoryBarrier(ManagedRegister) {
- __ mfence();
-}
-
void X86JNIMacroAssembler::CreateJObject(ManagedRegister mout_reg,
FrameOffset spilled_reference_offset,
ManagedRegister min_reg,
@@ -547,6 +392,20 @@ void X86JNIMacroAssembler::CreateJObject(FrameOffset out_off,
__ movl(Address(ESP, out_off), scratch);
}
+void X86JNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister reg,
+ JNIMacroLabel* slow_path,
+ JNIMacroLabel* resume) {
+ constexpr uint32_t kGlobalOrWeakGlobalMask =
+ dchecked_integral_cast<uint32_t>(IndirectReferenceTable::GetGlobalOrWeakGlobalMask());
+ constexpr uint32_t kIndirectRefKindMask =
+ dchecked_integral_cast<uint32_t>(IndirectReferenceTable::GetIndirectRefKindMask());
+ __ testl(reg.AsX86().AsCpuRegister(), Immediate(kGlobalOrWeakGlobalMask));
+ __ j(kNotZero, X86JNIMacroLabel::Cast(slow_path)->AsX86());
+ __ andl(reg.AsX86().AsCpuRegister(), Immediate(~kIndirectRefKindMask));
+ __ j(kZero, X86JNIMacroLabel::Cast(resume)->AsX86()); // Skip load for null.
+ __ movl(reg.AsX86().AsCpuRegister(), Address(reg.AsX86().AsCpuRegister(), /*disp=*/ 0));
+}
+
void X86JNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
// TODO: not validating references
}
@@ -724,6 +583,12 @@ void X86JNIMacroAssembler::TestMarkBit(ManagedRegister mref,
__ j(UnaryConditionToX86Condition(cond), X86JNIMacroLabel::Cast(label)->AsX86());
}
+
+void X86JNIMacroAssembler::TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) {
+ __ cmpb(Address::Absolute(address), Immediate(0));
+ __ j(kNotZero, X86JNIMacroLabel::Cast(label)->AsX86());
+}
+
void X86JNIMacroAssembler::Bind(JNIMacroLabel* label) {
CHECK(label != nullptr);
__ Bind(X86JNIMacroLabel::Cast(label)->AsX86());
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.h b/compiler/utils/x86/jni_macro_assembler_x86.h
index 29fccfd386..6b177f533b 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.h
+++ b/compiler/utils/x86/jni_macro_assembler_x86.h
@@ -27,7 +27,7 @@
#include "offsets.h"
#include "utils/jni_macro_assembler.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
class X86JNIMacroLabel;
@@ -59,30 +59,14 @@ class X86JNIMacroAssembler final : public JNIMacroAssemblerFwd<X86Assembler, Poi
// Store routines
void Store(FrameOffset offs, ManagedRegister src, size_t size) override;
void Store(ManagedRegister base, MemberOffset offs, ManagedRegister src, size_t size) override;
- void StoreRef(FrameOffset dest, ManagedRegister src) override;
void StoreRawPtr(FrameOffset dest, ManagedRegister src) override;
- void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) override;
-
- void StoreStackOffsetToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs) override;
-
- void StoreStackPointerToThread(ThreadOffset32 thr_offs) override;
-
- void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off) override;
+ void StoreStackPointerToThread(ThreadOffset32 thr_offs, bool tag_sp) override;
// Load routines
void Load(ManagedRegister dest, FrameOffset src, size_t size) override;
void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) override;
- void LoadFromThread(ManagedRegister dest, ThreadOffset32 src, size_t size) override;
-
- void LoadRef(ManagedRegister dest, FrameOffset src) override;
-
- void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
- bool unpoison_reference) override;
-
- void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) override;
-
void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) override;
// Copying routines
@@ -92,35 +76,7 @@ class X86JNIMacroAssembler final : public JNIMacroAssemblerFwd<X86Assembler, Poi
void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
- void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) override;
-
- void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
- override;
-
- void CopyRef(FrameOffset dest, FrameOffset src) override;
- void CopyRef(FrameOffset dest,
- ManagedRegister base,
- MemberOffset offs,
- bool unpoison_reference) override;
-
- void Copy(FrameOffset dest, FrameOffset src, size_t size) override;
-
- void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
- size_t size) override;
-
- void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch,
- size_t size) override;
-
- void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch,
- size_t size) override;
-
- void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
- ManagedRegister scratch, size_t size) override;
-
- void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
- ManagedRegister scratch, size_t size) override;
-
- void MemoryBarrier(ManagedRegister) override;
+ void Move(ManagedRegister dest, size_t value) override;
// Sign extension
void SignExtend(ManagedRegister mreg, size_t size) override;
@@ -132,20 +88,10 @@ class X86JNIMacroAssembler final : public JNIMacroAssemblerFwd<X86Assembler, Poi
void GetCurrentThread(ManagedRegister dest) override;
void GetCurrentThread(FrameOffset dest_offset) override;
- // Set up `out_reg` to hold a `jobject` (`StackReference<Object>*` to a spilled value),
- // or to be null if the value is null and `null_allowed`. `in_reg` holds a possibly
- // stale reference that can be used to avoid loading the spilled value to
- // see if the value is null.
- void CreateJObject(ManagedRegister out_reg,
- FrameOffset spilled_reference_offset,
- ManagedRegister in_reg,
- bool null_allowed) override;
-
- // Set up `out_off` to hold a `jobject` (`StackReference<Object>*` to a spilled value),
- // or to be null if the value is null and `null_allowed`.
- void CreateJObject(FrameOffset out_off,
- FrameOffset spilled_reference_offset,
- bool null_allowed) override;
+ // Decode JNI transition or local `jobject`. For (weak) global `jobject`, jump to slow path.
+ void DecodeJNITransitionOrLocalJObject(ManagedRegister reg,
+ JNIMacroLabel* slow_path,
+ JNIMacroLabel* resume) override;
// Heap::VerifyObject on src. In some cases (such as a reference to this) we
// know that src may not be null.
@@ -189,10 +135,29 @@ class X86JNIMacroAssembler final : public JNIMacroAssemblerFwd<X86Assembler, Poi
void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
// Emit a conditional jump to the label by applying a unary condition test to object's mark bit.
void TestMarkBit(ManagedRegister ref, JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
+ // Emit a conditional jump to label if the loaded value from specified locations is not zero.
+ void TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) override;
// Code at this offset will serve as the target for the Jump call.
void Bind(JNIMacroLabel* label) override;
private:
+ void Copy(FrameOffset dest, FrameOffset src, size_t size);
+
+ // Set up `out_reg` to hold a `jobject` (`StackReference<Object>*` to a spilled value),
+ // or to be null if the value is null and `null_allowed`. `in_reg` holds a possibly
+ // stale reference that can be used to avoid loading the spilled value to
+ // see if the value is null.
+ void CreateJObject(ManagedRegister out_reg,
+ FrameOffset spilled_reference_offset,
+ ManagedRegister in_reg,
+ bool null_allowed);
+
+ // Set up `out_off` to hold a `jobject` (`StackReference<Object>*` to a spilled value),
+ // or to be null if the value is null and `null_allowed`.
+ void CreateJObject(FrameOffset out_off,
+ FrameOffset spilled_reference_offset,
+ bool null_allowed);
+
DISALLOW_COPY_AND_ASSIGN(X86JNIMacroAssembler);
};
diff --git a/compiler/utils/x86/managed_register_x86.cc b/compiler/utils/x86/managed_register_x86.cc
index cc7cedf93e..bef948056f 100644
--- a/compiler/utils/x86/managed_register_x86.cc
+++ b/compiler/utils/x86/managed_register_x86.cc
@@ -18,7 +18,7 @@
#include "base/globals.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
// Define register pairs.
diff --git a/compiler/utils/x86/managed_register_x86.h b/compiler/utils/x86/managed_register_x86.h
index 27555bfd32..def4f68b27 100644
--- a/compiler/utils/x86/managed_register_x86.h
+++ b/compiler/utils/x86/managed_register_x86.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_UTILS_X86_MANAGED_REGISTER_X86_H_
#define ART_COMPILER_UTILS_X86_MANAGED_REGISTER_X86_H_
+#include "base/macros.h"
#include "constants_x86.h"
#include "utils/managed_register.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
// Values for register pairs.
diff --git a/compiler/utils/x86/managed_register_x86_test.cc b/compiler/utils/x86/managed_register_x86_test.cc
index 28af5313c7..9f5e1970ac 100644
--- a/compiler/utils/x86/managed_register_x86_test.cc
+++ b/compiler/utils/x86/managed_register_x86_test.cc
@@ -17,9 +17,10 @@
#include "managed_register_x86.h"
#include "base/globals.h"
+#include "base/macros.h"
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
TEST(X86ManagedRegister, NoRegister) {
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 21a44810ba..3fdf05bed9 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -21,7 +21,7 @@
#include "entrypoints/quick/quick_entrypoints.h"
#include "thread.h"
-namespace art {
+namespace art HIDDEN {
namespace x86_64 {
std::ostream& operator<<(std::ostream& os, const CpuRegister& reg) {
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index ea944c200e..235ea03e2b 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -30,9 +30,8 @@
#include "managed_register_x86_64.h"
#include "offsets.h"
#include "utils/assembler.h"
-#include "utils/jni_macro_assembler.h"
-namespace art {
+namespace art HIDDEN {
namespace x86_64 {
// Encodes an immediate value for operands.
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index f7e890d112..a7c206afaa 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -21,13 +21,14 @@
#include <random>
#include "base/bit_utils.h"
+#include "base/macros.h"
#include "base/malloc_arena_pool.h"
#include "base/stl_util.h"
#include "jni_macro_assembler_x86_64.h"
#include "utils/assembler_test.h"
#include "utils/jni_macro_assembler_test.h"
-namespace art {
+namespace art HIDDEN {
TEST(AssemblerX86_64, CreateBuffer) {
MallocArenaPool pool;
diff --git a/compiler/utils/x86_64/constants_x86_64.h b/compiler/utils/x86_64/constants_x86_64.h
index 301c8fc09b..52ac987766 100644
--- a/compiler/utils/x86_64/constants_x86_64.h
+++ b/compiler/utils/x86_64/constants_x86_64.h
@@ -25,7 +25,7 @@
#include "base/globals.h"
#include "base/macros.h"
-namespace art {
+namespace art HIDDEN {
namespace x86_64 {
class CpuRegister {
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
index d5d1bbadc9..388845730e 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -19,10 +19,11 @@
#include "base/casts.h"
#include "base/memory_region.h"
#include "entrypoints/quick/quick_entrypoints.h"
+#include "indirect_reference_table.h"
#include "lock_word.h"
#include "thread.h"
-namespace art {
+namespace art HIDDEN {
namespace x86_64 {
static dwarf::Reg DWARFReg(Register reg) {
@@ -194,37 +195,21 @@ void X86_64JNIMacroAssembler::Store(ManagedRegister mbase,
}
}
-void X86_64JNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
- X86_64ManagedRegister src = msrc.AsX86_64();
- CHECK(src.IsCpuRegister());
- __ movl(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
-}
-
void X86_64JNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
X86_64ManagedRegister src = msrc.AsX86_64();
CHECK(src.IsCpuRegister());
__ movq(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
}
-void X86_64JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm) {
- __ movl(Address(CpuRegister(RSP), dest), Immediate(imm)); // TODO(64) movq?
-}
-
-void X86_64JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset64 thr_offs,
- FrameOffset fr_offs) {
- CpuRegister scratch = GetScratchRegister();
- __ leaq(scratch, Address(CpuRegister(RSP), fr_offs));
- __ gs()->movq(Address::Absolute(thr_offs, true), scratch);
-}
-
-void X86_64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 thr_offs) {
- __ gs()->movq(Address::Absolute(thr_offs, true), CpuRegister(RSP));
-}
-
-void X86_64JNIMacroAssembler::StoreSpanning(FrameOffset /*dst*/,
- ManagedRegister /*src*/,
- FrameOffset /*in_off*/) {
- UNIMPLEMENTED(FATAL); // this case only currently exists for ARM
+void X86_64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 thr_offs, bool tag_sp) {
+ if (tag_sp) {
+ CpuRegister reg = GetScratchRegister();
+ __ movq(reg, CpuRegister(RSP));
+ __ orq(reg, Immediate(0x2));
+ __ gs()->movq(Address::Absolute(thr_offs, true), reg);
+ } else {
+ __ gs()->movq(Address::Absolute(thr_offs, true), CpuRegister(RSP));
+ }
}
void X86_64JNIMacroAssembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) {
@@ -263,67 +248,6 @@ void X86_64JNIMacroAssembler::Load(ManagedRegister mdest,
}
}
-void X86_64JNIMacroAssembler::LoadFromThread(ManagedRegister mdest,
- ThreadOffset64 src, size_t size) {
- X86_64ManagedRegister dest = mdest.AsX86_64();
- if (dest.IsNoRegister()) {
- CHECK_EQ(0u, size);
- } else if (dest.IsCpuRegister()) {
- if (size == 1u) {
- __ gs()->movzxb(dest.AsCpuRegister(), Address::Absolute(src, true));
- } else {
- CHECK_EQ(4u, size);
- __ gs()->movl(dest.AsCpuRegister(), Address::Absolute(src, true));
- }
- } else if (dest.IsRegisterPair()) {
- CHECK_EQ(8u, size);
- __ gs()->movq(dest.AsRegisterPairLow(), Address::Absolute(src, true));
- } else if (dest.IsX87Register()) {
- if (size == 4) {
- __ gs()->flds(Address::Absolute(src, true));
- } else {
- __ gs()->fldl(Address::Absolute(src, true));
- }
- } else {
- CHECK(dest.IsXmmRegister());
- if (size == 4) {
- __ gs()->movss(dest.AsXmmRegister(), Address::Absolute(src, true));
- } else {
- __ gs()->movsd(dest.AsXmmRegister(), Address::Absolute(src, true));
- }
- }
-}
-
-void X86_64JNIMacroAssembler::LoadRef(ManagedRegister mdest, FrameOffset src) {
- X86_64ManagedRegister dest = mdest.AsX86_64();
- CHECK(dest.IsCpuRegister());
- __ movq(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
-}
-
-void X86_64JNIMacroAssembler::LoadRef(ManagedRegister mdest,
- ManagedRegister mbase,
- MemberOffset offs,
- bool unpoison_reference) {
- X86_64ManagedRegister base = mbase.AsX86_64();
- X86_64ManagedRegister dest = mdest.AsX86_64();
- CHECK(base.IsCpuRegister());
- CHECK(dest.IsCpuRegister());
- __ movl(dest.AsCpuRegister(), Address(base.AsCpuRegister(), offs));
- if (unpoison_reference) {
- __ MaybeUnpoisonHeapReference(dest.AsCpuRegister());
- }
-}
-
-void X86_64JNIMacroAssembler::LoadRawPtr(ManagedRegister mdest,
- ManagedRegister mbase,
- Offset offs) {
- X86_64ManagedRegister base = mbase.AsX86_64();
- X86_64ManagedRegister dest = mdest.AsX86_64();
- CHECK(base.IsCpuRegister());
- CHECK(dest.IsCpuRegister());
- __ movq(dest.AsCpuRegister(), Address(base.AsCpuRegister(), offs));
-}
-
void X86_64JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) {
X86_64ManagedRegister dest = mdest.AsX86_64();
CHECK(dest.IsCpuRegister());
@@ -477,37 +401,10 @@ void X86_64JNIMacroAssembler::Move(ManagedRegister mdest, ManagedRegister msrc,
}
}
-void X86_64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) {
- CpuRegister scratch = GetScratchRegister();
- __ movl(scratch, Address(CpuRegister(RSP), src));
- __ movl(Address(CpuRegister(RSP), dest), scratch);
-}
-
-void X86_64JNIMacroAssembler::CopyRef(FrameOffset dest,
- ManagedRegister base,
- MemberOffset offs,
- bool unpoison_reference) {
- CpuRegister scratch = GetScratchRegister();
- __ movl(scratch, Address(base.AsX86_64().AsCpuRegister(), offs));
- if (unpoison_reference) {
- __ MaybeUnpoisonHeapReference(scratch);
- }
- __ movl(Address(CpuRegister(RSP), dest), scratch);
-}
-
-void X86_64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 thr_offs) {
- CpuRegister scratch = GetScratchRegister();
- __ gs()->movq(scratch, Address::Absolute(thr_offs, true));
- __ movq(Address(CpuRegister(RSP), fr_offs), scratch);
-}
-void X86_64JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset64 thr_offs,
- FrameOffset fr_offs,
- ManagedRegister mscratch) {
- X86_64ManagedRegister scratch = mscratch.AsX86_64();
- CHECK(scratch.IsCpuRegister());
- Load(scratch, fr_offs, 8);
- __ gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
+void X86_64JNIMacroAssembler::Move(ManagedRegister mdest, size_t value) {
+ X86_64ManagedRegister dest = mdest.AsX86_64();
+ __ movq(dest.AsCpuRegister(), Immediate(value));
}
void X86_64JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) {
@@ -522,67 +419,6 @@ void X86_64JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t siz
}
}
-void X86_64JNIMacroAssembler::Copy(FrameOffset /*dst*/,
- ManagedRegister /*src_base*/,
- Offset /*src_offset*/,
- ManagedRegister /*scratch*/,
- size_t /*size*/) {
- UNIMPLEMENTED(FATAL);
-}
-
-void X86_64JNIMacroAssembler::Copy(ManagedRegister dest_base,
- Offset dest_offset,
- FrameOffset src,
- ManagedRegister scratch,
- size_t size) {
- CHECK(scratch.IsNoRegister());
- CHECK_EQ(size, 4u);
- __ pushq(Address(CpuRegister(RSP), src));
- __ popq(Address(dest_base.AsX86_64().AsCpuRegister(), dest_offset));
-}
-
-void X86_64JNIMacroAssembler::Copy(FrameOffset dest,
- FrameOffset src_base,
- Offset src_offset,
- ManagedRegister mscratch,
- size_t size) {
- CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
- CHECK_EQ(size, 4u);
- __ movq(scratch, Address(CpuRegister(RSP), src_base));
- __ movq(scratch, Address(scratch, src_offset));
- __ movq(Address(CpuRegister(RSP), dest), scratch);
-}
-
-void X86_64JNIMacroAssembler::Copy(ManagedRegister dest,
- Offset dest_offset,
- ManagedRegister src,
- Offset src_offset,
- ManagedRegister scratch,
- size_t size) {
- CHECK_EQ(size, 4u);
- CHECK(scratch.IsNoRegister());
- __ pushq(Address(src.AsX86_64().AsCpuRegister(), src_offset));
- __ popq(Address(dest.AsX86_64().AsCpuRegister(), dest_offset));
-}
-
-void X86_64JNIMacroAssembler::Copy(FrameOffset dest,
- Offset dest_offset,
- FrameOffset src,
- Offset src_offset,
- ManagedRegister mscratch,
- size_t size) {
- CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
- CHECK_EQ(size, 4u);
- CHECK_EQ(dest.Int32Value(), src.Int32Value());
- __ movq(scratch, Address(CpuRegister(RSP), src));
- __ pushq(Address(scratch, src_offset));
- __ popq(Address(scratch, dest_offset));
-}
-
-void X86_64JNIMacroAssembler::MemoryBarrier(ManagedRegister) {
- __ mfence();
-}
-
void X86_64JNIMacroAssembler::CreateJObject(ManagedRegister mout_reg,
FrameOffset spilled_reference_offset,
ManagedRegister min_reg,
@@ -629,6 +465,19 @@ void X86_64JNIMacroAssembler::CreateJObject(FrameOffset out_off,
__ movq(Address(CpuRegister(RSP), out_off), scratch);
}
+void X86_64JNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister reg,
+ JNIMacroLabel* slow_path,
+ JNIMacroLabel* resume) {
+ constexpr uint64_t kGlobalOrWeakGlobalMask = IndirectReferenceTable::GetGlobalOrWeakGlobalMask();
+ constexpr uint64_t kIndirectRefKindMask = IndirectReferenceTable::GetIndirectRefKindMask();
+ // TODO: Add `testq()` with `imm32` to assembler to avoid using 64-bit pointer as 32-bit value.
+ __ testl(reg.AsX86_64().AsCpuRegister(), Immediate(kGlobalOrWeakGlobalMask));
+ __ j(kNotZero, X86_64JNIMacroLabel::Cast(slow_path)->AsX86_64());
+ __ andq(reg.AsX86_64().AsCpuRegister(), Immediate(~kIndirectRefKindMask));
+ __ j(kZero, X86_64JNIMacroLabel::Cast(resume)->AsX86_64()); // Skip load for null.
+ __ movl(reg.AsX86_64().AsCpuRegister(), Address(reg.AsX86_64().AsCpuRegister(), /*disp=*/ 0));
+}
+
void X86_64JNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
// TODO: not validating references
}
@@ -803,6 +652,13 @@ void X86_64JNIMacroAssembler::TestMarkBit(ManagedRegister mref,
__ j(UnaryConditionToX86_64Condition(cond), X86_64JNIMacroLabel::Cast(label)->AsX86_64());
}
+void X86_64JNIMacroAssembler::TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) {
+ CpuRegister scratch = GetScratchRegister();
+ __ movq(scratch, Immediate(address));
+ __ cmpb(Address(scratch, 0), Immediate(0));
+ __ j(kNotZero, X86_64JNIMacroLabel::Cast(label)->AsX86_64());
+}
+
void X86_64JNIMacroAssembler::Bind(JNIMacroLabel* label) {
CHECK(label != nullptr);
__ Bind(X86_64JNIMacroLabel::Cast(label)->AsX86_64());
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
index e080f0b3df..da0aef9869 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
@@ -28,7 +28,7 @@
#include "utils/assembler.h"
#include "utils/jni_macro_assembler.h"
-namespace art {
+namespace art HIDDEN {
namespace x86_64 {
class X86_64JNIMacroAssembler final : public JNIMacroAssemblerFwd<X86_64Assembler,
@@ -60,32 +60,14 @@ class X86_64JNIMacroAssembler final : public JNIMacroAssemblerFwd<X86_64Assemble
// Store routines
void Store(FrameOffset offs, ManagedRegister src, size_t size) override;
void Store(ManagedRegister base, MemberOffset offs, ManagedRegister src, size_t size) override;
- void StoreRef(FrameOffset dest, ManagedRegister src) override;
void StoreRawPtr(FrameOffset dest, ManagedRegister src) override;
- void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) override;
-
- void StoreStackOffsetToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs) override;
-
- void StoreStackPointerToThread(ThreadOffset64 thr_offs) override;
-
- void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off) override;
+ void StoreStackPointerToThread(ThreadOffset64 thr_offs, bool tag_sp) override;
// Load routines
void Load(ManagedRegister dest, FrameOffset src, size_t size) override;
void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) override;
- void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) override;
-
- void LoadRef(ManagedRegister dest, FrameOffset src) override;
-
- void LoadRef(ManagedRegister dest,
- ManagedRegister base,
- MemberOffset offs,
- bool unpoison_reference) override;
-
- void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) override;
-
void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) override;
// Copying routines
@@ -95,52 +77,7 @@ class X86_64JNIMacroAssembler final : public JNIMacroAssemblerFwd<X86_64Assemble
void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
- void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 thr_offs) override;
-
- void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
- override;
-
- void CopyRef(FrameOffset dest, FrameOffset src) override;
- void CopyRef(FrameOffset dest,
- ManagedRegister base,
- MemberOffset offs,
- bool unpoison_reference) override;
-
- void Copy(FrameOffset dest, FrameOffset src, size_t size) override;
-
- void Copy(FrameOffset dest,
- ManagedRegister src_base,
- Offset src_offset,
- ManagedRegister scratch,
- size_t size) override;
-
- void Copy(ManagedRegister dest_base,
- Offset dest_offset,
- FrameOffset src,
- ManagedRegister scratch,
- size_t size) override;
-
- void Copy(FrameOffset dest,
- FrameOffset src_base,
- Offset src_offset,
- ManagedRegister scratch,
- size_t size) override;
-
- void Copy(ManagedRegister dest,
- Offset dest_offset,
- ManagedRegister src,
- Offset src_offset,
- ManagedRegister scratch,
- size_t size) override;
-
- void Copy(FrameOffset dest,
- Offset dest_offset,
- FrameOffset src,
- Offset src_offset,
- ManagedRegister scratch,
- size_t size) override;
-
- void MemoryBarrier(ManagedRegister) override;
+ void Move(ManagedRegister dest, size_t value) override;
// Sign extension
void SignExtend(ManagedRegister mreg, size_t size) override;
@@ -152,20 +89,10 @@ class X86_64JNIMacroAssembler final : public JNIMacroAssemblerFwd<X86_64Assemble
void GetCurrentThread(ManagedRegister dest) override;
void GetCurrentThread(FrameOffset dest_offset) override;
- // Set up `out_reg` to hold a `jobject` (`StackReference<Object>*` to a spilled value),
- // or to be null if the value is null and `null_allowed`. `in_reg` holds a possibly
- // stale reference that can be used to avoid loading the spilled value to
- // see if the value is null.
- void CreateJObject(ManagedRegister out_reg,
- FrameOffset spilled_reference_offset,
- ManagedRegister in_reg,
- bool null_allowed) override;
-
- // Set up `out_off` to hold a `jobject` (`StackReference<Object>*` to a spilled value),
- // or to be null if the value is null and `null_allowed`.
- void CreateJObject(FrameOffset out_off,
- FrameOffset spilled_reference_offset,
- bool null_allowed) override;
+ // Decode JNI transition or local `jobject`. For (weak) global `jobject`, jump to slow path.
+ void DecodeJNITransitionOrLocalJObject(ManagedRegister reg,
+ JNIMacroLabel* slow_path,
+ JNIMacroLabel* resume) override;
// Heap::VerifyObject on src. In some cases (such as a reference to this) we
// know that src may not be null.
@@ -209,10 +136,29 @@ class X86_64JNIMacroAssembler final : public JNIMacroAssemblerFwd<X86_64Assemble
void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
// Emit a conditional jump to the label by applying a unary condition test to object's mark bit.
void TestMarkBit(ManagedRegister ref, JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
+ // Emit a conditional jump to label if the loaded value from specified locations is not zero.
+ void TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) override;
// Code at this offset will serve as the target for the Jump call.
void Bind(JNIMacroLabel* label) override;
private:
+ void Copy(FrameOffset dest, FrameOffset src, size_t size);
+
+ // Set up `out_reg` to hold a `jobject` (`StackReference<Object>*` to a spilled value),
+ // or to be null if the value is null and `null_allowed`. `in_reg` holds a possibly
+ // stale reference that can be used to avoid loading the spilled value to
+ // see if the value is null.
+ void CreateJObject(ManagedRegister out_reg,
+ FrameOffset spilled_reference_offset,
+ ManagedRegister in_reg,
+ bool null_allowed);
+
+ // Set up `out_off` to hold a `jobject` (`StackReference<Object>*` to a spilled value),
+ // or to be null if the value is null and `null_allowed`.
+ void CreateJObject(FrameOffset out_off,
+ FrameOffset spilled_reference_offset,
+ bool null_allowed);
+
DISALLOW_COPY_AND_ASSIGN(X86_64JNIMacroAssembler);
};
diff --git a/compiler/utils/x86_64/managed_register_x86_64.cc b/compiler/utils/x86_64/managed_register_x86_64.cc
index c0eec9d86c..75ff8aaf1d 100644
--- a/compiler/utils/x86_64/managed_register_x86_64.cc
+++ b/compiler/utils/x86_64/managed_register_x86_64.cc
@@ -18,7 +18,7 @@
#include "base/globals.h"
-namespace art {
+namespace art HIDDEN {
namespace x86_64 {
// Define register pairs.
diff --git a/compiler/utils/x86_64/managed_register_x86_64.h b/compiler/utils/x86_64/managed_register_x86_64.h
index 62c0e373a7..7a1be0bd8f 100644
--- a/compiler/utils/x86_64/managed_register_x86_64.h
+++ b/compiler/utils/x86_64/managed_register_x86_64.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_UTILS_X86_64_MANAGED_REGISTER_X86_64_H_
#define ART_COMPILER_UTILS_X86_64_MANAGED_REGISTER_X86_64_H_
+#include "base/macros.h"
#include "constants_x86_64.h"
#include "utils/managed_register.h"
-namespace art {
+namespace art HIDDEN {
namespace x86_64 {
// Values for register pairs.
diff --git a/compiler/utils/x86_64/managed_register_x86_64_test.cc b/compiler/utils/x86_64/managed_register_x86_64_test.cc
index 46a405ffaf..048268bf9b 100644
--- a/compiler/utils/x86_64/managed_register_x86_64_test.cc
+++ b/compiler/utils/x86_64/managed_register_x86_64_test.cc
@@ -16,9 +16,10 @@
#include "managed_register_x86_64.h"
#include "base/globals.h"
+#include "base/macros.h"
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
namespace x86_64 {
TEST(X86_64ManagedRegister, NoRegister) {