summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/Android.bp4
-rw-r--r--compiler/common_compiler_test.cc2
-rw-r--r--compiler/compiled_method.h48
-rw-r--r--compiler/compiled_method_test.cc108
-rw-r--r--compiler/compiler.h4
-rw-r--r--compiler/debug/elf_debug_info_writer.h4
-rw-r--r--compiler/debug/elf_debug_loc_writer.h2
-rw-r--r--compiler/debug/elf_debug_writer.cc1
-rw-r--r--compiler/debug/elf_debug_writer.h2
-rw-r--r--compiler/dex/dex_to_dex_compiler.cc2
-rw-r--r--compiler/dex/inline_method_analyser.cc7
-rw-r--r--compiler/dex/verification_results.cc12
-rw-r--r--compiler/dex/verified_method.cc13
-rw-r--r--compiler/dex/verified_method.h6
-rw-r--r--compiler/driver/compiled_method_storage.cc2
-rw-r--r--compiler/driver/compiled_method_storage_test.cc4
-rw-r--r--compiler/driver/compiler_driver-inl.h8
-rw-r--r--compiler/driver/compiler_driver.cc351
-rw-r--r--compiler/driver/compiler_driver.h49
-rw-r--r--compiler/driver/compiler_driver_test.cc20
-rw-r--r--compiler/driver/compiler_options.cc2
-rw-r--r--compiler/driver/compiler_options.h6
-rw-r--r--compiler/elf_writer_quick.cc2
-rw-r--r--compiler/image_test.cc483
-rw-r--r--compiler/image_test.h497
-rw-r--r--compiler/image_write_read_test.cc (renamed from compiler/compiled_class.h)31
-rw-r--r--compiler/image_writer.h7
-rw-r--r--compiler/intrinsics_list.h6
-rw-r--r--compiler/jit/jit_compiler.cc5
-rw-r--r--compiler/linker/arm/relative_patcher_arm_base.cc50
-rw-r--r--compiler/linker/arm/relative_patcher_arm_base.h63
-rw-r--r--compiler/linker/arm/relative_patcher_thumb2.cc316
-rw-r--r--compiler/linker/arm/relative_patcher_thumb2.h90
-rw-r--r--compiler/linker/arm/relative_patcher_thumb2_test.cc915
-rw-r--r--compiler/linker/arm64/relative_patcher_arm64.cc182
-rw-r--r--compiler/linker/arm64/relative_patcher_arm64.h39
-rw-r--r--compiler/linker/arm64/relative_patcher_arm64_test.cc227
-rw-r--r--compiler/linker/mips/relative_patcher_mips32r6_test.cc19
-rw-r--r--compiler/linker/mips/relative_patcher_mips_test.cc27
-rw-r--r--compiler/linker/mips64/relative_patcher_mips64_test.cc19
-rw-r--r--compiler/linker/multi_oat_relative_patcher.h2
-rw-r--r--compiler/linker/multi_oat_relative_patcher_test.cc2
-rw-r--r--compiler/linker/relative_patcher_test.h10
-rw-r--r--compiler/linker/x86/relative_patcher_x86_test.cc13
-rw-r--r--compiler/linker/x86_64/relative_patcher_x86_64_test.cc13
-rw-r--r--compiler/oat_writer.cc79
-rw-r--r--compiler/oat_writer.h2
-rw-r--r--compiler/optimizing/block_builder.cc20
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc4
-rw-r--r--compiler/optimizing/bounds_check_elimination_test.cc2
-rw-r--r--compiler/optimizing/code_generator.cc25
-rw-r--r--compiler/optimizing/code_generator.h10
-rw-r--r--compiler/optimizing/code_generator_arm.cc1327
-rw-r--r--compiler/optimizing/code_generator_arm.h79
-rw-r--r--compiler/optimizing/code_generator_arm64.cc428
-rw-r--r--compiler/optimizing/code_generator_arm64.h71
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc1422
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h88
-rw-r--r--compiler/optimizing/code_generator_mips.cc761
-rw-r--r--compiler/optimizing/code_generator_mips.h33
-rw-r--r--compiler/optimizing/code_generator_mips64.cc309
-rw-r--r--compiler/optimizing/code_generator_mips64.h41
-rw-r--r--compiler/optimizing/code_generator_vector_arm64.cc173
-rw-r--r--compiler/optimizing/code_generator_vector_mips64.cc642
-rw-r--r--compiler/optimizing/code_generator_vector_x86.cc149
-rw-r--r--compiler/optimizing/code_generator_vector_x86_64.cc153
-rw-r--r--compiler/optimizing/code_generator_x86.cc161
-rw-r--r--compiler/optimizing/code_generator_x86.h13
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc106
-rw-r--r--compiler/optimizing/code_generator_x86_64.h19
-rw-r--r--compiler/optimizing/code_sinking.cc19
-rw-r--r--compiler/optimizing/codegen_test.cc30
-rw-r--r--compiler/optimizing/codegen_test_utils.h2
-rw-r--r--compiler/optimizing/graph_checker.cc22
-rw-r--r--compiler/optimizing/graph_visualizer.cc8
-rw-r--r--compiler/optimizing/gvn.cc4
-rw-r--r--compiler/optimizing/inliner.cc21
-rw-r--r--compiler/optimizing/instruction_builder.cc132
-rw-r--r--compiler/optimizing/instruction_builder.h24
-rw-r--r--compiler/optimizing/instruction_simplifier.cc21
-rw-r--r--compiler/optimizing/instruction_simplifier.h14
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.cc13
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.h2
-rw-r--r--compiler/optimizing/instruction_simplifier_shared.cc58
-rw-r--r--compiler/optimizing/instruction_simplifier_shared.h1
-rw-r--r--compiler/optimizing/intrinsics.cc4
-rw-r--r--compiler/optimizing/intrinsics_arm.cc102
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc97
-rw-r--r--compiler/optimizing/intrinsics_arm64.h3
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc108
-rw-r--r--compiler/optimizing/intrinsics_mips.cc250
-rw-r--r--compiler/optimizing/intrinsics_mips.h1
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc177
-rw-r--r--compiler/optimizing/intrinsics_mips64.h1
-rw-r--r--compiler/optimizing/intrinsics_x86.cc83
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc87
-rw-r--r--compiler/optimizing/load_store_analysis.cc51
-rw-r--r--compiler/optimizing/load_store_analysis.h518
-rw-r--r--compiler/optimizing/load_store_analysis_test.cc187
-rw-r--r--compiler/optimizing/load_store_elimination.cc507
-rw-r--r--compiler/optimizing/load_store_elimination.h9
-rw-r--r--compiler/optimizing/loop_optimization.cc312
-rw-r--r--compiler/optimizing/loop_optimization.h8
-rw-r--r--compiler/optimizing/nodes.cc128
-rw-r--r--compiler/optimizing/nodes.h249
-rw-r--r--compiler/optimizing/nodes_shared.h43
-rw-r--r--compiler/optimizing/nodes_vector.h29
-rw-r--r--compiler/optimizing/optimizing_compiler.cc35
-rw-r--r--compiler/optimizing/pc_relative_fixups_mips.cc15
-rw-r--r--compiler/optimizing/pc_relative_fixups_x86.cc4
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc39
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.h1
-rw-r--r--compiler/optimizing/register_allocator_graph_color.cc5
-rw-r--r--compiler/optimizing/scheduler.cc40
-rw-r--r--compiler/optimizing/scheduler.h5
-rw-r--r--compiler/optimizing/scheduler_arm.cc827
-rw-r--r--compiler/optimizing/scheduler_arm.h163
-rw-r--r--compiler/optimizing/scheduler_arm64.cc151
-rw-r--r--compiler/optimizing/scheduler_arm64.h79
-rw-r--r--compiler/optimizing/scheduler_test.cc260
-rw-r--r--compiler/optimizing/sharpening.cc57
-rw-r--r--compiler/optimizing/sharpening.h4
-rw-r--r--compiler/utils/arm/assembler_arm.h3
-rw-r--r--compiler/utils/arm/assembler_thumb2.cc153
-rw-r--r--compiler/utils/arm/assembler_thumb2.h41
-rw-r--r--compiler/utils/assembler_thumb_test_expected.cc.inc4
-rw-r--r--compiler/utils/dedupe_set_test.cc2
-rw-r--r--compiler/utils/label.h12
-rw-r--r--compiler/utils/managed_register.h12
-rw-r--r--compiler/utils/mips64/assembler_mips64.cc219
-rw-r--r--compiler/utils/mips64/assembler_mips64.h76
-rw-r--r--compiler/utils/mips64/assembler_mips64_test.cc354
-rw-r--r--compiler/utils/swap_space.cc2
-rw-r--r--compiler/utils/swap_space.h2
-rw-r--r--compiler/utils/type_reference.h51
-rw-r--r--compiler/utils/x86/assembler_x86.cc133
-rw-r--r--compiler/utils/x86/assembler_x86.h19
-rw-r--r--compiler/utils/x86/assembler_x86_test.cc64
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc150
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h19
-rw-r--r--compiler/utils/x86_64/assembler_x86_64_test.cc64
-rw-r--r--compiler/verifier_deps_test.cc2
142 files changed, 11959 insertions, 4225 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp
index 6ef866a3c6..307a42cbba 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -67,6 +67,7 @@ art_cc_defaults {
"optimizing/intrinsics.cc",
"optimizing/licm.cc",
"optimizing/linear_order.cc",
+ "optimizing/load_store_analysis.cc",
"optimizing/load_store_elimination.cc",
"optimizing/locations.cc",
"optimizing/loop_optimization.cc",
@@ -115,6 +116,7 @@ art_cc_defaults {
"optimizing/intrinsics_arm.cc",
"optimizing/intrinsics_arm_vixl.cc",
"optimizing/nodes_shared.cc",
+ "optimizing/scheduler_arm.cc",
"utils/arm/assembler_arm.cc",
"utils/arm/assembler_arm_vixl.cc",
"utils/arm/assembler_thumb2.cc",
@@ -338,6 +340,7 @@ art_cc_test {
"elf_writer_test.cc",
"exception_test.cc",
"image_test.cc",
+ "image_write_read_test.cc",
"jni/jni_compiler_test.cc",
"linker/multi_oat_relative_patcher_test.cc",
"linker/output_stream_test.cc",
@@ -372,6 +375,7 @@ art_cc_test {
"jni/jni_cfi_test.cc",
"optimizing/codegen_test.cc",
+ "optimizing/load_store_analysis_test.cc",
"optimizing/optimizing_cfi_test.cc",
"optimizing/scheduler_test.cc",
],
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 39edd1eb02..a1ee68faeb 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -33,7 +33,7 @@
#include "mirror/object-inl.h"
#include "oat_quick_method_header.h"
#include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
#include "utils.h"
namespace art {
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index d0f66e2d8e..0ca23a5c50 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -119,24 +119,24 @@ class LinkerPatch {
// choose to squeeze the Type into fewer than 8 bits, we'll have to declare
// patch_type_ as an uintN_t and do explicit static_cast<>s.
enum class Type : uint8_t {
- kMethod,
+ kMethodRelative, // NOTE: Actual patching is instruction_set-dependent.
kCall,
kCallRelative, // NOTE: Actual patching is instruction_set-dependent.
- kType,
kTypeRelative, // NOTE: Actual patching is instruction_set-dependent.
kTypeBssEntry, // NOTE: Actual patching is instruction_set-dependent.
- kString,
kStringRelative, // NOTE: Actual patching is instruction_set-dependent.
kStringBssEntry, // NOTE: Actual patching is instruction_set-dependent.
kDexCacheArray, // NOTE: Actual patching is instruction_set-dependent.
kBakerReadBarrierBranch, // NOTE: Actual patching is instruction_set-dependent.
};
- static LinkerPatch MethodPatch(size_t literal_offset,
- const DexFile* target_dex_file,
- uint32_t target_method_idx) {
- LinkerPatch patch(literal_offset, Type::kMethod, target_dex_file);
+ static LinkerPatch RelativeMethodPatch(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t target_method_idx) {
+ LinkerPatch patch(literal_offset, Type::kMethodRelative, target_dex_file);
patch.method_idx_ = target_method_idx;
+ patch.pc_insn_offset_ = pc_insn_offset;
return patch;
}
@@ -156,14 +156,6 @@ class LinkerPatch {
return patch;
}
- static LinkerPatch TypePatch(size_t literal_offset,
- const DexFile* target_dex_file,
- uint32_t target_type_idx) {
- LinkerPatch patch(literal_offset, Type::kType, target_dex_file);
- patch.type_idx_ = target_type_idx;
- return patch;
- }
-
static LinkerPatch RelativeTypePatch(size_t literal_offset,
const DexFile* target_dex_file,
uint32_t pc_insn_offset,
@@ -184,14 +176,6 @@ class LinkerPatch {
return patch;
}
- static LinkerPatch StringPatch(size_t literal_offset,
- const DexFile* target_dex_file,
- uint32_t target_string_idx) {
- LinkerPatch patch(literal_offset, Type::kString, target_dex_file);
- patch.string_idx_ = target_string_idx;
- return patch;
- }
-
static LinkerPatch RelativeStringPatch(size_t literal_offset,
const DexFile* target_dex_file,
uint32_t pc_insn_offset,
@@ -244,6 +228,7 @@ class LinkerPatch {
bool IsPcRelative() const {
switch (GetType()) {
+ case Type::kMethodRelative:
case Type::kCallRelative:
case Type::kTypeRelative:
case Type::kTypeBssEntry:
@@ -258,36 +243,32 @@ class LinkerPatch {
}
MethodReference TargetMethod() const {
- DCHECK(patch_type_ == Type::kMethod ||
+ DCHECK(patch_type_ == Type::kMethodRelative ||
patch_type_ == Type::kCall ||
patch_type_ == Type::kCallRelative);
return MethodReference(target_dex_file_, method_idx_);
}
const DexFile* TargetTypeDexFile() const {
- DCHECK(patch_type_ == Type::kType ||
- patch_type_ == Type::kTypeRelative ||
+ DCHECK(patch_type_ == Type::kTypeRelative ||
patch_type_ == Type::kTypeBssEntry);
return target_dex_file_;
}
dex::TypeIndex TargetTypeIndex() const {
- DCHECK(patch_type_ == Type::kType ||
- patch_type_ == Type::kTypeRelative ||
+ DCHECK(patch_type_ == Type::kTypeRelative ||
patch_type_ == Type::kTypeBssEntry);
return dex::TypeIndex(type_idx_);
}
const DexFile* TargetStringDexFile() const {
- DCHECK(patch_type_ == Type::kString ||
- patch_type_ == Type::kStringRelative ||
+ DCHECK(patch_type_ == Type::kStringRelative ||
patch_type_ == Type::kStringBssEntry);
return target_dex_file_;
}
dex::StringIndex TargetStringIndex() const {
- DCHECK(patch_type_ == Type::kString ||
- patch_type_ == Type::kStringRelative ||
+ DCHECK(patch_type_ == Type::kStringRelative ||
patch_type_ == Type::kStringBssEntry);
return dex::StringIndex(string_idx_);
}
@@ -303,7 +284,8 @@ class LinkerPatch {
}
uint32_t PcInsnOffset() const {
- DCHECK(patch_type_ == Type::kTypeRelative ||
+ DCHECK(patch_type_ == Type::kMethodRelative ||
+ patch_type_ == Type::kTypeRelative ||
patch_type_ == Type::kTypeBssEntry ||
patch_type_ == Type::kStringRelative ||
patch_type_ == Type::kStringBssEntry ||
diff --git a/compiler/compiled_method_test.cc b/compiler/compiled_method_test.cc
index 99ee875da2..72b2282ade 100644
--- a/compiler/compiled_method_test.cc
+++ b/compiler/compiled_method_test.cc
@@ -50,10 +50,14 @@ TEST(CompiledMethod, LinkerPatchOperators) {
const DexFile* dex_file1 = reinterpret_cast<const DexFile*>(1);
const DexFile* dex_file2 = reinterpret_cast<const DexFile*>(2);
LinkerPatch patches[] = {
- LinkerPatch::MethodPatch(16u, dex_file1, 1000u),
- LinkerPatch::MethodPatch(16u, dex_file1, 1001u),
- LinkerPatch::MethodPatch(16u, dex_file2, 1000u),
- LinkerPatch::MethodPatch(16u, dex_file2, 1001u), // Index 3.
+ LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3000u, 1000u),
+ LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3001u, 1000u),
+ LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3000u, 1001u),
+ LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3001u, 1001u), // Index 3.
+ LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3000u, 1000u),
+ LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3001u, 1000u),
+ LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3000u, 1001u),
+ LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3001u, 1001u),
LinkerPatch::CodePatch(16u, dex_file1, 1000u),
LinkerPatch::CodePatch(16u, dex_file1, 1001u),
LinkerPatch::CodePatch(16u, dex_file2, 1000u),
@@ -62,10 +66,38 @@ TEST(CompiledMethod, LinkerPatchOperators) {
LinkerPatch::RelativeCodePatch(16u, dex_file1, 1001u),
LinkerPatch::RelativeCodePatch(16u, dex_file2, 1000u),
LinkerPatch::RelativeCodePatch(16u, dex_file2, 1001u),
- LinkerPatch::TypePatch(16u, dex_file1, 1000u),
- LinkerPatch::TypePatch(16u, dex_file1, 1001u),
- LinkerPatch::TypePatch(16u, dex_file2, 1000u),
- LinkerPatch::TypePatch(16u, dex_file2, 1001u),
+ LinkerPatch::RelativeTypePatch(16u, dex_file1, 3000u, 1000u),
+ LinkerPatch::RelativeTypePatch(16u, dex_file1, 3001u, 1000u),
+ LinkerPatch::RelativeTypePatch(16u, dex_file1, 3000u, 1001u),
+ LinkerPatch::RelativeTypePatch(16u, dex_file1, 3001u, 1001u),
+ LinkerPatch::RelativeTypePatch(16u, dex_file2, 3000u, 1000u),
+ LinkerPatch::RelativeTypePatch(16u, dex_file2, 3001u, 1000u),
+ LinkerPatch::RelativeTypePatch(16u, dex_file2, 3000u, 1001u),
+ LinkerPatch::RelativeTypePatch(16u, dex_file2, 3001u, 1001u),
+ LinkerPatch::TypeBssEntryPatch(16u, dex_file1, 3000u, 1000u),
+ LinkerPatch::TypeBssEntryPatch(16u, dex_file1, 3001u, 1000u),
+ LinkerPatch::TypeBssEntryPatch(16u, dex_file1, 3000u, 1001u),
+ LinkerPatch::TypeBssEntryPatch(16u, dex_file1, 3001u, 1001u),
+ LinkerPatch::TypeBssEntryPatch(16u, dex_file2, 3000u, 1000u),
+ LinkerPatch::TypeBssEntryPatch(16u, dex_file2, 3001u, 1000u),
+ LinkerPatch::TypeBssEntryPatch(16u, dex_file2, 3000u, 1001u),
+ LinkerPatch::TypeBssEntryPatch(16u, dex_file2, 3001u, 1001u),
+ LinkerPatch::RelativeStringPatch(16u, dex_file1, 3000u, 1000u),
+ LinkerPatch::RelativeStringPatch(16u, dex_file1, 3001u, 1000u),
+ LinkerPatch::RelativeStringPatch(16u, dex_file1, 3000u, 1001u),
+ LinkerPatch::RelativeStringPatch(16u, dex_file1, 3001u, 1001u),
+ LinkerPatch::RelativeStringPatch(16u, dex_file2, 3000u, 1000u),
+ LinkerPatch::RelativeStringPatch(16u, dex_file2, 3001u, 1000u),
+ LinkerPatch::RelativeStringPatch(16u, dex_file2, 3000u, 1001u),
+ LinkerPatch::RelativeStringPatch(16u, dex_file2, 3001u, 1001u),
+ LinkerPatch::StringBssEntryPatch(16u, dex_file1, 3000u, 1000u),
+ LinkerPatch::StringBssEntryPatch(16u, dex_file1, 3001u, 1000u),
+ LinkerPatch::StringBssEntryPatch(16u, dex_file1, 3000u, 1001u),
+ LinkerPatch::StringBssEntryPatch(16u, dex_file1, 3001u, 1001u),
+ LinkerPatch::StringBssEntryPatch(16u, dex_file2, 3000u, 1000u),
+ LinkerPatch::StringBssEntryPatch(16u, dex_file2, 3001u, 1000u),
+ LinkerPatch::StringBssEntryPatch(16u, dex_file2, 3000u, 1001u),
+ LinkerPatch::StringBssEntryPatch(16u, dex_file2, 3001u, 1001u),
LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3000u, 2000u),
LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3001u, 2000u),
LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3000u, 2001u),
@@ -74,10 +106,19 @@ TEST(CompiledMethod, LinkerPatchOperators) {
LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3001u, 2000u),
LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3000u, 2001u),
LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3001u, 2001u),
- LinkerPatch::MethodPatch(32u, dex_file1, 1000u),
- LinkerPatch::MethodPatch(32u, dex_file1, 1001u),
- LinkerPatch::MethodPatch(32u, dex_file2, 1000u),
- LinkerPatch::MethodPatch(32u, dex_file2, 1001u),
+ LinkerPatch::BakerReadBarrierBranchPatch(16u, 0u, 0u),
+ LinkerPatch::BakerReadBarrierBranchPatch(16u, 0u, 1u),
+ LinkerPatch::BakerReadBarrierBranchPatch(16u, 1u, 0u),
+ LinkerPatch::BakerReadBarrierBranchPatch(16u, 1u, 1u),
+
+ LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3000u, 1000u),
+ LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3001u, 1000u),
+ LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3000u, 1001u),
+ LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3001u, 1001u),
+ LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3000u, 1000u),
+ LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3001u, 1000u),
+ LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3000u, 1001u),
+ LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3001u, 1001u),
LinkerPatch::CodePatch(32u, dex_file1, 1000u),
LinkerPatch::CodePatch(32u, dex_file1, 1001u),
LinkerPatch::CodePatch(32u, dex_file2, 1000u),
@@ -86,10 +127,38 @@ TEST(CompiledMethod, LinkerPatchOperators) {
LinkerPatch::RelativeCodePatch(32u, dex_file1, 1001u),
LinkerPatch::RelativeCodePatch(32u, dex_file2, 1000u),
LinkerPatch::RelativeCodePatch(32u, dex_file2, 1001u),
- LinkerPatch::TypePatch(32u, dex_file1, 1000u),
- LinkerPatch::TypePatch(32u, dex_file1, 1001u),
- LinkerPatch::TypePatch(32u, dex_file2, 1000u),
- LinkerPatch::TypePatch(32u, dex_file2, 1001u),
+ LinkerPatch::RelativeTypePatch(32u, dex_file1, 3000u, 1000u),
+ LinkerPatch::RelativeTypePatch(32u, dex_file1, 3001u, 1000u),
+ LinkerPatch::RelativeTypePatch(32u, dex_file1, 3000u, 1001u),
+ LinkerPatch::RelativeTypePatch(32u, dex_file1, 3001u, 1001u),
+ LinkerPatch::RelativeTypePatch(32u, dex_file2, 3000u, 1000u),
+ LinkerPatch::RelativeTypePatch(32u, dex_file2, 3001u, 1000u),
+ LinkerPatch::RelativeTypePatch(32u, dex_file2, 3000u, 1001u),
+ LinkerPatch::RelativeTypePatch(32u, dex_file2, 3001u, 1001u),
+ LinkerPatch::TypeBssEntryPatch(32u, dex_file1, 3000u, 1000u),
+ LinkerPatch::TypeBssEntryPatch(32u, dex_file1, 3001u, 1000u),
+ LinkerPatch::TypeBssEntryPatch(32u, dex_file1, 3000u, 1001u),
+ LinkerPatch::TypeBssEntryPatch(32u, dex_file1, 3001u, 1001u),
+ LinkerPatch::TypeBssEntryPatch(32u, dex_file2, 3000u, 1000u),
+ LinkerPatch::TypeBssEntryPatch(32u, dex_file2, 3001u, 1000u),
+ LinkerPatch::TypeBssEntryPatch(32u, dex_file2, 3000u, 1001u),
+ LinkerPatch::TypeBssEntryPatch(32u, dex_file2, 3001u, 1001u),
+ LinkerPatch::RelativeStringPatch(32u, dex_file1, 3000u, 1000u),
+ LinkerPatch::RelativeStringPatch(32u, dex_file1, 3001u, 1000u),
+ LinkerPatch::RelativeStringPatch(32u, dex_file1, 3000u, 1001u),
+ LinkerPatch::RelativeStringPatch(32u, dex_file1, 3001u, 1001u),
+ LinkerPatch::RelativeStringPatch(32u, dex_file2, 3000u, 1000u),
+ LinkerPatch::RelativeStringPatch(32u, dex_file2, 3001u, 1000u),
+ LinkerPatch::RelativeStringPatch(32u, dex_file2, 3000u, 1001u),
+ LinkerPatch::RelativeStringPatch(32u, dex_file2, 3001u, 1001u),
+ LinkerPatch::StringBssEntryPatch(32u, dex_file1, 3000u, 1000u),
+ LinkerPatch::StringBssEntryPatch(32u, dex_file1, 3001u, 1000u),
+ LinkerPatch::StringBssEntryPatch(32u, dex_file1, 3000u, 1001u),
+ LinkerPatch::StringBssEntryPatch(32u, dex_file1, 3001u, 1001u),
+ LinkerPatch::StringBssEntryPatch(32u, dex_file2, 3000u, 1000u),
+ LinkerPatch::StringBssEntryPatch(32u, dex_file2, 3001u, 1000u),
+ LinkerPatch::StringBssEntryPatch(32u, dex_file2, 3000u, 1001u),
+ LinkerPatch::StringBssEntryPatch(32u, dex_file2, 3001u, 1001u),
LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3000u, 2000u),
LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3001u, 2000u),
LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3000u, 2001u),
@@ -98,7 +167,12 @@ TEST(CompiledMethod, LinkerPatchOperators) {
LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3001u, 2000u),
LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3000u, 2001u),
LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3001u, 2001u),
- LinkerPatch::MethodPatch(16u, dex_file2, 1001u), // identical with patch as index 3.
+ LinkerPatch::BakerReadBarrierBranchPatch(32u, 0u, 0u),
+ LinkerPatch::BakerReadBarrierBranchPatch(32u, 0u, 1u),
+ LinkerPatch::BakerReadBarrierBranchPatch(32u, 1u, 0u),
+ LinkerPatch::BakerReadBarrierBranchPatch(32u, 1u, 1u),
+
+ LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3001u, 1001u), // Same as patch at index 3.
};
constexpr size_t last_index = arraysize(patches) - 1u;
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 908d3669ed..cd4c59101e 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -25,11 +25,11 @@ namespace art {
namespace jit {
class JitCodeCache;
-}
+} // namespace jit
namespace mirror {
class ClassLoader;
class DexCache;
-}
+} // namespace mirror
class ArtMethod;
class CompilerDriver;
diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h
index 558c7d5754..de32351abf 100644
--- a/compiler/debug/elf_debug_info_writer.h
+++ b/compiler/debug/elf_debug_info_writer.h
@@ -411,7 +411,7 @@ class ElfCompilationUnitWriter {
for (const auto& base_class_reference : base_class_references) {
size_t reference_offset = base_class_reference.first;
mirror::Class* base_class = base_class_reference.second;
- const auto& it = class_declarations.find(base_class);
+ const auto it = class_declarations.find(base_class);
if (it != class_declarations.end()) {
info_.UpdateUint32(reference_offset, it->second);
} else {
@@ -512,7 +512,7 @@ class ElfCompilationUnitWriter {
using namespace dwarf; // NOLINT. For easy access to DWARF constants.
DCHECK(!desc.empty());
- const auto& it = type_cache_.find(desc);
+ const auto it = type_cache_.find(desc);
if (it != type_cache_.end()) {
return it->second;
}
diff --git a/compiler/debug/elf_debug_loc_writer.h b/compiler/debug/elf_debug_loc_writer.h
index cbfdbddd1d..bf47e8f3d9 100644
--- a/compiler/debug/elf_debug_loc_writer.h
+++ b/compiler/debug/elf_debug_loc_writer.h
@@ -85,7 +85,7 @@ struct VariableLocation {
// The result will cover all ranges where the variable is in scope.
// PCs corresponding to stackmap with dex register map are accurate,
// all other PCs are best-effort only.
-std::vector<VariableLocation> GetVariableLocations(
+static std::vector<VariableLocation> GetVariableLocations(
const MethodDebugInfo* method_info,
const std::vector<DexRegisterMap>& dex_register_maps,
uint16_t vreg,
diff --git a/compiler/debug/elf_debug_writer.cc b/compiler/debug/elf_debug_writer.cc
index d1c10a9246..7fa6e146c5 100644
--- a/compiler/debug/elf_debug_writer.cc
+++ b/compiler/debug/elf_debug_writer.cc
@@ -30,6 +30,7 @@
#include "debug/method_debug_info.h"
#include "elf_builder.h"
#include "linker/vector_output_stream.h"
+#include "oat.h"
namespace art {
namespace debug {
diff --git a/compiler/debug/elf_debug_writer.h b/compiler/debug/elf_debug_writer.h
index 07f7229827..5d688108e7 100644
--- a/compiler/debug/elf_debug_writer.h
+++ b/compiler/debug/elf_debug_writer.h
@@ -29,7 +29,7 @@ namespace art {
class OatHeader;
namespace mirror {
class Class;
-}
+} // namespace mirror
namespace debug {
struct MethodDebugInfo;
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 1573062033..2db99cda3e 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -28,7 +28,7 @@
#include "driver/compiler_driver.h"
#include "driver/dex_compilation_unit.h"
#include "mirror/dex_cache.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
namespace art {
namespace optimizer {
diff --git a/compiler/dex/inline_method_analyser.cc b/compiler/dex/inline_method_analyser.cc
index e691a67dc0..257229101c 100644
--- a/compiler/dex/inline_method_analyser.cc
+++ b/compiler/dex/inline_method_analyser.cc
@@ -433,8 +433,11 @@ bool InlineMethodAnalyser::AnalyseMethodCode(ArtMethod* method, InlineMethod* re
// Native or abstract.
return false;
}
- return AnalyseMethodCode(
- code_item, method->ToMethodReference(), method->IsStatic(), method, result);
+ return AnalyseMethodCode(code_item,
+ MethodReference(method->GetDexFile(), method->GetDexMethodIndex()),
+ method->IsStatic(),
+ method,
+ result);
}
bool InlineMethodAnalyser::AnalyseMethodCode(const DexFile::CodeItem* code_item,
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 3f0df3b2c8..b87cb61ed6 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -17,12 +17,13 @@
#include "verification_results.h"
#include "base/logging.h"
-#include "base/stl_util.h"
#include "base/mutex-inl.h"
+#include "base/stl_util.h"
#include "driver/compiler_driver.h"
#include "driver/compiler_options.h"
+#include "runtime.h"
#include "thread.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
#include "utils/atomic_method_ref_map-inl.h"
#include "verified_method.h"
#include "verifier/method_verifier-inl.h"
@@ -82,7 +83,12 @@ void VerificationResults::ProcessVerifiedMethod(verifier::MethodVerifier* method
// TODO: Investigate why are we doing the work again for this method and try to avoid it.
LOG(WARNING) << "Method processed more than once: " << ref.PrettyMethod();
if (!Runtime::Current()->UseJitCompilation()) {
- DCHECK_EQ(existing->GetSafeCastSet().size(), verified_method->GetSafeCastSet().size());
+ if (kIsDebugBuild) {
+ auto ex_set = existing->GetSafeCastSet();
+ auto ve_set = verified_method->GetSafeCastSet();
+ CHECK_EQ(ex_set == nullptr, ve_set == nullptr);
+ CHECK((ex_set == nullptr) || (ex_set->size() == ve_set->size()));
+ }
}
// Let the unique_ptr delete the new verified method since there was already an existing one
// registered. It is unsafe to replace the existing one since the JIT may be using it to
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index 608a18aa66..e46dc597fa 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -49,7 +49,10 @@ const VerifiedMethod* VerifiedMethod::Create(verifier::MethodVerifier* method_ve
}
bool VerifiedMethod::IsSafeCast(uint32_t pc) const {
- return std::binary_search(safe_cast_set_.begin(), safe_cast_set_.end(), pc);
+ if (safe_cast_set_ == nullptr) {
+ return false;
+ }
+ return std::binary_search(safe_cast_set_->begin(), safe_cast_set_->end(), pc);
}
void VerifiedMethod::GenerateSafeCastSet(verifier::MethodVerifier* method_verifier) {
@@ -94,12 +97,16 @@ void VerifiedMethod::GenerateSafeCastSet(verifier::MethodVerifier* method_verifi
/* strict */ true,
/* assignable */ true);
}
+ if (safe_cast_set_ == nullptr) {
+ safe_cast_set_.reset(new SafeCastSet());
+ }
// Verify ordering for push_back() to the sorted vector.
- DCHECK(safe_cast_set_.empty() || safe_cast_set_.back() < dex_pc);
- safe_cast_set_.push_back(dex_pc);
+ DCHECK(safe_cast_set_->empty() || safe_cast_set_->back() < dex_pc);
+ safe_cast_set_->push_back(dex_pc);
}
}
}
+ DCHECK(safe_cast_set_ == nullptr || !safe_cast_set_->empty());
}
} // namespace art
diff --git a/compiler/dex/verified_method.h b/compiler/dex/verified_method.h
index 439e69ece9..64b3f448e6 100644
--- a/compiler/dex/verified_method.h
+++ b/compiler/dex/verified_method.h
@@ -43,8 +43,8 @@ class VerifiedMethod {
REQUIRES_SHARED(Locks::mutator_lock_);
~VerifiedMethod() = default;
- const SafeCastSet& GetSafeCastSet() const {
- return safe_cast_set_;
+ const SafeCastSet* GetSafeCastSet() const {
+ return safe_cast_set_.get();
}
// Returns true if the cast can statically be verified to be redundant
@@ -69,7 +69,7 @@ class VerifiedMethod {
void GenerateSafeCastSet(verifier::MethodVerifier* method_verifier)
REQUIRES_SHARED(Locks::mutator_lock_);
- SafeCastSet safe_cast_set_;
+ std::unique_ptr<SafeCastSet> safe_cast_set_;
const uint32_t encountered_error_types_;
const bool has_runtime_throw_;
diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc
index e6a47ba60f..528b0a215b 100644
--- a/compiler/driver/compiled_method_storage.cc
+++ b/compiler/driver/compiled_method_storage.cc
@@ -21,7 +21,7 @@
#include "base/logging.h"
#include "compiled_method.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
#include "utils.h"
#include "utils/dedupe_set-inl.h"
#include "utils/swap_space.h"
diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc
index 6572d170e6..bbd28b2576 100644
--- a/compiler/driver/compiled_method_storage_test.cc
+++ b/compiler/driver/compiled_method_storage_test.cc
@@ -71,11 +71,11 @@ TEST(CompiledMethodStorage, Deduplicate) {
};
const LinkerPatch raw_patches1[] = {
LinkerPatch::CodePatch(0u, nullptr, 1u),
- LinkerPatch::MethodPatch(4u, nullptr, 1u),
+ LinkerPatch::RelativeMethodPatch(4u, nullptr, 0u, 1u),
};
const LinkerPatch raw_patches2[] = {
LinkerPatch::CodePatch(0u, nullptr, 1u),
- LinkerPatch::MethodPatch(4u, nullptr, 2u),
+ LinkerPatch::RelativeMethodPatch(4u, nullptr, 0u, 2u),
};
ArrayRef<const LinkerPatch> patches[] = {
ArrayRef<const LinkerPatch>(raw_patches1),
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 582330611d..8cc1cc38e2 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -24,10 +24,11 @@
#include "base/enums.h"
#include "class_linker-inl.h"
#include "dex_compilation_unit.h"
+#include "handle_scope-inl.h"
#include "mirror/class_loader.h"
#include "mirror/dex_cache-inl.h"
+#include "runtime.h"
#include "scoped_thread_state_change-inl.h"
-#include "handle_scope-inl.h"
namespace art {
@@ -149,6 +150,11 @@ inline ArtMethod* CompilerDriver::ResolveMethod(
return resolved_method;
}
+inline VerificationResults* CompilerDriver::GetVerificationResults() const {
+ DCHECK(Runtime::Current()->IsAotCompiler());
+ return verification_results_;
+}
+
} // namespace art
#endif // ART_COMPILER_DRIVER_COMPILER_DRIVER_INL_H_
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index a8ab7c6091..93f678c64a 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -28,6 +28,7 @@
#include "art_field-inl.h"
#include "art_method-inl.h"
+#include "base/arena_allocator.h"
#include "base/array_ref.h"
#include "base/bit_vector.h"
#include "base/enums.h"
@@ -36,7 +37,6 @@
#include "base/time_utils.h"
#include "base/timing_logger.h"
#include "class_linker-inl.h"
-#include "compiled_class.h"
#include "compiled_method.h"
#include "compiler.h"
#include "compiler_callbacks.h"
@@ -317,11 +317,6 @@ CompilerDriver::CompilerDriver(
}
CompilerDriver::~CompilerDriver() {
- Thread* self = Thread::Current();
- {
- MutexLock mu(self, compiled_classes_lock_);
- STLDeleteValues(&compiled_classes_);
- }
compiled_methods_.Visit([this](const MethodReference& ref ATTRIBUTE_UNUSED,
CompiledMethod* method) {
if (method != nullptr) {
@@ -1005,7 +1000,8 @@ bool CompilerDriver::ShouldCompileBasedOnProfile(const MethodReference& method_r
if (profile_compilation_info_ == nullptr) {
return false;
}
- bool result = profile_compilation_info_->ContainsMethod(method_ref);
+ // TODO: Revisit compiling all startup methods. b/36457259
+ bool result = profile_compilation_info_->IsStartupOrHotMethod(method_ref);
if (kDebugProfileGuidedCompilation) {
LOG(INFO) << "[ProfileGuidedCompilation] "
@@ -1978,8 +1974,7 @@ bool CompilerDriver::FastVerify(jobject jclass_loader,
if (compiler_only_verifies) {
// Just update the compiled_classes_ map. The compiler doesn't need to resolve
// the type.
- compiled_classes_.Overwrite(
- ClassReference(dex_file, i), new CompiledClass(mirror::Class::kStatusVerified));
+ compiled_classes_.Overwrite(ClassReference(dex_file, i), mirror::Class::kStatusVerified);
} else {
// Update the class status, so later compilation stages know they don't need to verify
// the class.
@@ -2245,7 +2240,7 @@ class InitializeClassVisitor : public CompilationVisitor {
public:
explicit InitializeClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {}
- void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE {
+ void Visit(size_t class_def_index) OVERRIDE {
ATRACE_CALL();
jobject jclass_loader = manager_->GetClassLoader();
const DexFile& dex_file = *manager_->GetDexFile();
@@ -2260,89 +2255,132 @@ class InitializeClassVisitor : public CompilationVisitor {
Handle<mirror::Class> klass(
hs.NewHandle(manager_->GetClassLinker()->FindClass(soa.Self(), descriptor, class_loader)));
- if (klass != nullptr && !SkipClass(jclass_loader, dex_file, klass.Get())) {
- // Only try to initialize classes that were successfully verified.
- if (klass->IsVerified()) {
- // Attempt to initialize the class but bail if we either need to initialize the super-class
- // or static fields.
- manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, false);
- if (!klass->IsInitialized()) {
- // We don't want non-trivial class initialization occurring on multiple threads due to
- // deadlock problems. For example, a parent class is initialized (holding its lock) that
- // refers to a sub-class in its static/class initializer causing it to try to acquire the
- // sub-class' lock. While on a second thread the sub-class is initialized (holding its lock)
- // after first initializing its parents, whose locks are acquired. This leads to a
- // parent-to-child and a child-to-parent lock ordering and consequent potential deadlock.
- // We need to use an ObjectLock due to potential suspension in the interpreting code. Rather
- // than use a special Object for the purpose we use the Class of java.lang.Class.
- Handle<mirror::Class> h_klass(hs.NewHandle(klass->GetClass()));
- ObjectLock<mirror::Class> lock(soa.Self(), h_klass);
- // Attempt to initialize allowing initialization of parent classes but still not static
- // fields.
+ if (klass != nullptr && !SkipClass(manager_->GetClassLoader(), dex_file, klass.Get())) {
+ TryInitializeClass(klass, class_loader);
+ }
+ // Clear any class not found or verification exceptions.
+ soa.Self()->ClearException();
+ }
+
+ // A helper function for initializing klass.
+ void TryInitializeClass(Handle<mirror::Class> klass, Handle<mirror::ClassLoader>& class_loader)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ const DexFile& dex_file = klass->GetDexFile();
+ const DexFile::ClassDef* class_def = klass->GetClassDef();
+ const DexFile::TypeId& class_type_id = dex_file.GetTypeId(class_def->class_idx_);
+ const char* descriptor = dex_file.StringDataByIdx(class_type_id.descriptor_idx_);
+ ScopedObjectAccessUnchecked soa(Thread::Current());
+ StackHandleScope<3> hs(soa.Self());
+
+ mirror::Class::Status old_status = klass->GetStatus();;
+ // Only try to initialize classes that were successfully verified.
+ if (klass->IsVerified()) {
+ // Attempt to initialize the class but bail if we either need to initialize the super-class
+ // or static fields.
+ manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, false);
+ old_status = klass->GetStatus();
+ if (!klass->IsInitialized()) {
+ // We don't want non-trivial class initialization occurring on multiple threads due to
+ // deadlock problems. For example, a parent class is initialized (holding its lock) that
+ // refers to a sub-class in its static/class initializer causing it to try to acquire the
+ // sub-class' lock. While on a second thread the sub-class is initialized (holding its lock)
+ // after first initializing its parents, whose locks are acquired. This leads to a
+ // parent-to-child and a child-to-parent lock ordering and consequent potential deadlock.
+ // We need to use an ObjectLock due to potential suspension in the interpreting code. Rather
+ // than use a special Object for the purpose we use the Class of java.lang.Class.
+ Handle<mirror::Class> h_klass(hs.NewHandle(klass->GetClass()));
+ ObjectLock<mirror::Class> lock(soa.Self(), h_klass);
+ // Attempt to initialize allowing initialization of parent classes but still not static
+ // fields.
+ bool is_superclass_initialized = true;
+ if (!manager_->GetCompiler()->GetCompilerOptions().IsAppImage()) {
+ // If not an app image case, the compiler won't initialize too much things and do a fast
+ // fail, don't check dependencies.
manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, true);
- if (!klass->IsInitialized()) {
+ } else {
+ // For app images, do the initialization recursively and resolve types encountered to make
+ // sure the compiler runs without error.
+ is_superclass_initialized = InitializeDependencies(klass, class_loader, soa.Self());
+ if (is_superclass_initialized) {
+ manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, true);
+ }
+ }
+ old_status = klass->GetStatus();
+ // If superclass cannot be initialized, no need to proceed.
+ if (!klass->IsInitialized() &&
+ is_superclass_initialized &&
+ manager_->GetCompiler()->IsImageClass(descriptor)) {
+ bool can_init_static_fields = false;
+ if (manager_->GetCompiler()->GetCompilerOptions().IsBootImage()) {
// We need to initialize static fields, we only do this for image classes that aren't
// marked with the $NoPreloadHolder (which implies this should not be initialized early).
- bool can_init_static_fields =
- manager_->GetCompiler()->GetCompilerOptions().IsBootImage() &&
- manager_->GetCompiler()->IsImageClass(descriptor) &&
- !StringPiece(descriptor).ends_with("$NoPreloadHolder;");
- if (can_init_static_fields) {
- VLOG(compiler) << "Initializing: " << descriptor;
- // TODO multithreading support. We should ensure the current compilation thread has
- // exclusive access to the runtime and the transaction. To achieve this, we could use
- // a ReaderWriterMutex but we're holding the mutator lock so we fail mutex sanity
- // checks in Thread::AssertThreadSuspensionIsAllowable.
- Runtime* const runtime = Runtime::Current();
- Transaction transaction;
-
- // Run the class initializer in transaction mode.
- runtime->EnterTransactionMode(&transaction);
- const mirror::Class::Status old_status = klass->GetStatus();
- bool success = manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, true,
- true);
- // TODO we detach transaction from runtime to indicate we quit the transactional
- // mode which prevents the GC from visiting objects modified during the transaction.
- // Ensure GC is not run so don't access freed objects when aborting transaction.
-
- {
- ScopedAssertNoThreadSuspension ants("Transaction end");
- runtime->ExitTransactionMode();
-
- if (!success) {
- CHECK(soa.Self()->IsExceptionPending());
- mirror::Throwable* exception = soa.Self()->GetException();
- VLOG(compiler) << "Initialization of " << descriptor << " aborted because of "
- << exception->Dump();
- std::ostream* file_log = manager_->GetCompiler()->
- GetCompilerOptions().GetInitFailureOutput();
- if (file_log != nullptr) {
- *file_log << descriptor << "\n";
- *file_log << exception->Dump() << "\n";
- }
- soa.Self()->ClearException();
- transaction.Rollback();
- CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored";
+ can_init_static_fields = !StringPiece(descriptor).ends_with("$NoPreloadHolder;");
+ } else {
+ can_init_static_fields = manager_->GetCompiler()->GetCompilerOptions().IsAppImage() &&
+ !soa.Self()->IsExceptionPending() &&
+ NoClinitInDependency(klass, soa.Self(), &class_loader);
+ // TODO The checking for clinit can be removed since it's already
+ // checked when init superclass. Currently keep it because it contains
+ // processing of intern strings. Will be removed later when intern strings
+ // and clinit are both initialized.
+ }
+
+ if (can_init_static_fields) {
+ VLOG(compiler) << "Initializing: " << descriptor;
+ // TODO multithreading support. We should ensure the current compilation thread has
+ // exclusive access to the runtime and the transaction. To achieve this, we could use
+ // a ReaderWriterMutex but we're holding the mutator lock so we fail mutex sanity
+ // checks in Thread::AssertThreadSuspensionIsAllowable.
+ Runtime* const runtime = Runtime::Current();
+ Transaction transaction;
+
+ // Run the class initializer in transaction mode.
+ runtime->EnterTransactionMode(&transaction);
+ bool success = manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, true,
+ true);
+ // TODO we detach transaction from runtime to indicate we quit the transactional
+ // mode which prevents the GC from visiting objects modified during the transaction.
+ // Ensure GC is not run so don't access freed objects when aborting transaction.
+
+ {
+ ScopedAssertNoThreadSuspension ants("Transaction end");
+ runtime->ExitTransactionMode();
+
+ if (!success) {
+ CHECK(soa.Self()->IsExceptionPending());
+ mirror::Throwable* exception = soa.Self()->GetException();
+ VLOG(compiler) << "Initialization of " << descriptor << " aborted because of "
+ << exception->Dump();
+ std::ostream* file_log = manager_->GetCompiler()->
+ GetCompilerOptions().GetInitFailureOutput();
+ if (file_log != nullptr) {
+ *file_log << descriptor << "\n";
+ *file_log << exception->Dump() << "\n";
}
+ soa.Self()->ClearException();
+ transaction.Rollback();
+ CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored";
}
+ }
- if (!success) {
- // On failure, still intern strings of static fields and seen in <clinit>, as these
- // will be created in the zygote. This is separated from the transaction code just
- // above as we will allocate strings, so must be allowed to suspend.
+ if (!success) {
+ // On failure, still intern strings of static fields and seen in <clinit>, as these
+ // will be created in the zygote. This is separated from the transaction code just
+ // above as we will allocate strings, so must be allowed to suspend.
+ if (&klass->GetDexFile() == manager_->GetDexFile()) {
InternStrings(klass, class_loader);
}
}
}
- soa.Self()->AssertNoPendingException();
}
+ soa.Self()->AssertNoPendingException();
}
- // Record the final class status if necessary.
- ClassReference ref(manager_->GetDexFile(), class_def_index);
- manager_->GetCompiler()->RecordClassStatus(ref, klass->GetStatus());
}
- // Clear any class not found or verification exceptions.
- soa.Self()->ClearException();
+ // Record the final class status if necessary.
+ ClassReference ref(&dex_file, klass->GetDexClassDefIndex());
+ // Back up the status before doing initialization for static encoded fields,
+ // because the static encoded branch wants to keep the status to uninitialized.
+ manager_->GetCompiler()->RecordClassStatus(ref, old_status);
}
private:
@@ -2397,6 +2435,136 @@ class InitializeClassVisitor : public CompilationVisitor {
}
}
+ bool ResolveTypesOfMethods(Thread* self, ArtMethod* m)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ auto rtn_type = m->GetReturnType(true);
+ if (rtn_type == nullptr) {
+ self->ClearException();
+ return false;
+ }
+ const DexFile::TypeList* types = m->GetParameterTypeList();
+ if (types != nullptr) {
+ for (uint32_t i = 0; i < types->Size(); ++i) {
+ dex::TypeIndex param_type_idx = types->GetTypeItem(i).type_idx_;
+ auto param_type = m->GetClassFromTypeIndex(param_type_idx, true);
+ if (param_type == nullptr) {
+ self->ClearException();
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ // Pre resolve types mentioned in all method signatures before start a transaction
+ // since ResolveType doesn't work in transaction mode.
+ bool PreResolveTypes(Thread* self, const Handle<mirror::Class>& klass)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ PointerSize pointer_size = manager_->GetClassLinker()->GetImagePointerSize();
+ for (ArtMethod& m : klass->GetMethods(pointer_size)) {
+ if (!ResolveTypesOfMethods(self, &m)) {
+ return false;
+ }
+ }
+ if (klass->IsInterface()) {
+ return true;
+ } else if (klass->HasSuperClass()) {
+ StackHandleScope<1> hs(self);
+ MutableHandle<mirror::Class> super_klass(hs.NewHandle<mirror::Class>(klass->GetSuperClass()));
+ for (int i = super_klass->GetVTableLength() - 1; i >= 0; --i) {
+ ArtMethod* m = klass->GetVTableEntry(i, pointer_size);
+ ArtMethod* super_m = super_klass->GetVTableEntry(i, pointer_size);
+ if (!ResolveTypesOfMethods(self, m) || !ResolveTypesOfMethods(self, super_m)) {
+ return false;
+ }
+ }
+ for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
+ super_klass.Assign(klass->GetIfTable()->GetInterface(i));
+ if (klass->GetClassLoader() != super_klass->GetClassLoader()) {
+ uint32_t num_methods = super_klass->NumVirtualMethods();
+ for (uint32_t j = 0; j < num_methods; ++j) {
+ ArtMethod* m = klass->GetIfTable()->GetMethodArray(i)->GetElementPtrSize<ArtMethod*>(
+ j, pointer_size);
+ ArtMethod* super_m = super_klass->GetVirtualMethod(j, pointer_size);
+ if (!ResolveTypesOfMethods(self, m) || !ResolveTypesOfMethods(self, super_m)) {
+ return false;
+ }
+ }
+ }
+ }
+ }
+ return true;
+ }
+
+ // Initialize the klass's dependencies recursively before initializing itself.
+ // Checking for interfaces is also necessary since interfaces can contain
+ // both default methods and static encoded fields.
+ bool InitializeDependencies(const Handle<mirror::Class>& klass,
+ Handle<mirror::ClassLoader> class_loader,
+ Thread* self)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ if (klass->HasSuperClass()) {
+ ObjPtr<mirror::Class> super_class = klass->GetSuperClass();
+ StackHandleScope<1> hs(self);
+ Handle<mirror::Class> handle_scope_super(hs.NewHandle(super_class));
+ if (!handle_scope_super->IsInitialized()) {
+ this->TryInitializeClass(handle_scope_super, class_loader);
+ if (!handle_scope_super->IsInitialized()) {
+ return false;
+ }
+ }
+ }
+
+ uint32_t num_if = klass->NumDirectInterfaces();
+ for (size_t i = 0; i < num_if; i++) {
+ ObjPtr<mirror::Class>
+ interface = mirror::Class::GetDirectInterface(self, klass.Get(), i);
+ StackHandleScope<1> hs(self);
+ Handle<mirror::Class> handle_interface(hs.NewHandle(interface));
+
+ TryInitializeClass(handle_interface, class_loader);
+
+ if (!handle_interface->IsInitialized()) {
+ return false;
+ }
+ }
+
+ return PreResolveTypes(self, klass);
+ }
+
+ // In this phase the classes containing class initializers are ignored. Make sure no
+ // clinit appears in kalss's super class chain and interfaces.
+ bool NoClinitInDependency(const Handle<mirror::Class>& klass,
+ Thread* self,
+ Handle<mirror::ClassLoader>* class_loader)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ ArtMethod* clinit =
+ klass->FindClassInitializer(manager_->GetClassLinker()->GetImagePointerSize());
+ if (clinit != nullptr) {
+ VLOG(compiler) << klass->PrettyClass() << ' ' << clinit->PrettyMethod(true);
+ return false;
+ }
+ if (klass->HasSuperClass()) {
+ ObjPtr<mirror::Class> super_class = klass->GetSuperClass();
+ StackHandleScope<1> hs(self);
+ Handle<mirror::Class> handle_scope_super(hs.NewHandle(super_class));
+ if (!NoClinitInDependency(handle_scope_super, self, class_loader))
+ return false;
+ }
+
+ uint32_t num_if = klass->NumDirectInterfaces();
+ for (size_t i = 0; i < num_if; i++) {
+ ObjPtr<mirror::Class>
+ interface = mirror::Class::GetDirectInterface(self, klass.Get(), i);
+ StackHandleScope<1> hs(self);
+ Handle<mirror::Class> handle_interface(hs.NewHandle(interface));
+ if (!NoClinitInDependency(handle_interface, self, class_loader))
+ return false;
+ }
+
+ return true;
+ }
+
const ParallelCompilationManager* const manager_;
};
@@ -2416,7 +2584,10 @@ void CompilerDriver::InitializeClasses(jobject jni_class_loader,
ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, dex_files,
init_thread_pool);
- if (GetCompilerOptions().IsBootImage()) {
+
+ if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsAppImage()) {
+ // Set the concurrency thread to 1 to support initialization for App Images since transaction
+ // doesn't support multithreading now.
// TODO: remove this when transactional mode supports multithreading.
init_thread_count = 1U;
}
@@ -2690,14 +2861,15 @@ void CompilerDriver::AddCompiledMethod(const MethodReference& method_ref,
<< method_ref.dex_file->PrettyMethod(method_ref.dex_method_index);
}
-CompiledClass* CompilerDriver::GetCompiledClass(ClassReference ref) const {
+bool CompilerDriver::GetCompiledClass(ClassReference ref, mirror::Class::Status* status) const {
+ DCHECK(status != nullptr);
MutexLock mu(Thread::Current(), compiled_classes_lock_);
- ClassTable::const_iterator it = compiled_classes_.find(ref);
+ ClassStateTable::const_iterator it = compiled_classes_.find(ref);
if (it == compiled_classes_.end()) {
- return nullptr;
+ return false;
}
- CHECK(it->second != nullptr);
- return it->second;
+ *status = it->second;
+ return true;
}
void CompilerDriver::RecordClassStatus(ClassReference ref, mirror::Class::Status status) {
@@ -2719,12 +2891,11 @@ void CompilerDriver::RecordClassStatus(ClassReference ref, mirror::Class::Status
MutexLock mu(Thread::Current(), compiled_classes_lock_);
auto it = compiled_classes_.find(ref);
if (it == compiled_classes_.end()) {
- CompiledClass* compiled_class = new CompiledClass(status);
- compiled_classes_.Overwrite(ref, compiled_class);
- } else if (status > it->second->GetStatus()) {
+ compiled_classes_.Overwrite(ref, status);
+ } else if (status > it->second) {
// Update the status if we now have a greater one. This happens with vdex,
// which records a class is verified, but does not resolve it.
- it->second->SetStatus(status);
+ it->second = status;
}
}
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 874e35716c..38e7d2c686 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -23,7 +23,6 @@
#include <vector>
#include "arch/instruction_set.h"
-#include "base/arena_allocator.h"
#include "base/array_ref.h"
#include "base/bit_utils.h"
#include "base/mutex.h"
@@ -38,7 +37,6 @@
#include "method_reference.h"
#include "mirror/class.h" // For mirror::Class::Status.
#include "os.h"
-#include "runtime.h"
#include "safe_map.h"
#include "thread_pool.h"
#include "utils/atomic_method_ref_map.h"
@@ -56,12 +54,12 @@ class VerifierDepsTest;
} // namespace verifier
class BitVector;
-class CompiledClass;
class CompiledMethod;
class CompilerOptions;
class DexCompilationUnit;
struct InlineIGetIPutData;
class InstructionSetFeatures;
+class InternTable;
class ParallelCompilationManager;
class ScopedObjectAccess;
template <class Allocator> class SrcMap;
@@ -132,10 +130,7 @@ class CompilerDriver {
REQUIRES_SHARED(Locks::mutator_lock_)
REQUIRES(!compiled_classes_lock_, !dex_to_dex_references_lock_);
- VerificationResults* GetVerificationResults() const {
- DCHECK(Runtime::Current()->IsAotCompiler());
- return verification_results_;
- }
+ VerificationResults* GetVerificationResults() const;
InstructionSet GetInstructionSet() const {
return instruction_set_;
@@ -164,7 +159,7 @@ class CompilerDriver {
std::unique_ptr<const std::vector<uint8_t>> CreateQuickResolutionTrampoline() const;
std::unique_ptr<const std::vector<uint8_t>> CreateQuickToInterpreterBridge() const;
- CompiledClass* GetCompiledClass(ClassReference ref) const
+ bool GetCompiledClass(ClassReference ref, mirror::Class::Status* status) const
REQUIRES(!compiled_classes_lock_);
CompiledMethod* GetCompiledMethod(MethodReference ref) const;
@@ -179,6 +174,40 @@ class CompilerDriver {
uint16_t class_def_index,
bool requires)
REQUIRES(!requires_constructor_barrier_lock_);
+
+ // Do the <init> methods for this class require a constructor barrier (prior to the return)?
+ // The answer is "yes", if and only if this class has any instance final fields.
+ // (This must not be called for any non-<init> methods; the answer would be "no").
+ //
+ // ---
+ //
+ // JLS 17.5.1 "Semantics of final fields" mandates that all final fields are frozen at the end
+ // of the invoked constructor. The constructor barrier is a conservative implementation means of
+ // enforcing the freezes happen-before the object being constructed is observable by another
+ // thread.
+ //
+ // Note: This question only makes sense for instance constructors;
+ // static constructors (despite possibly having finals) never need
+ // a barrier.
+ //
+ // JLS 12.4.2 "Detailed Initialization Procedure" approximately describes
+ // class initialization as:
+ //
+ // lock(class.lock)
+ // class.state = initializing
+ // unlock(class.lock)
+ //
+ // invoke <clinit>
+ //
+ // lock(class.lock)
+ // class.state = initialized
+ // unlock(class.lock) <-- acts as a release
+ //
+ // The last operation in the above example acts as an atomic release
+ // for any stores in <clinit>, which ends up being stricter
+ // than what a constructor barrier needs.
+ //
+ // See also QuasiAtomic::ThreadFenceForConstructor().
bool RequiresConstructorBarrier(Thread* self,
const DexFile* dex_file,
uint16_t class_def_index)
@@ -471,10 +500,10 @@ class CompilerDriver {
std::map<ClassReference, bool> requires_constructor_barrier_
GUARDED_BY(requires_constructor_barrier_lock_);
- typedef SafeMap<const ClassReference, CompiledClass*> ClassTable;
+ using ClassStateTable = SafeMap<const ClassReference, mirror::Class::Status>;
// All class references that this compiler has compiled.
mutable Mutex compiled_classes_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
- ClassTable compiled_classes_ GUARDED_BY(compiled_classes_lock_);
+ ClassStateTable compiled_classes_ GUARDED_BY(compiled_classes_lock_);
typedef AtomicMethodRefMap<CompiledMethod*> MethodTable;
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 17854fd61a..4b979d8125 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -23,7 +23,6 @@
#include "art_method-inl.h"
#include "class_linker-inl.h"
#include "common_compiler_test.h"
-#include "compiled_class.h"
#include "dex_file.h"
#include "dex_file_types.h"
#include "gc/heap.h"
@@ -240,8 +239,14 @@ class CompilerDriverProfileTest : public CompilerDriverTest {
ProfileCompilationInfo info;
for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
- profile_info_.AddMethodIndex(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 1);
- profile_info_.AddMethodIndex(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 2);
+ profile_info_.AddMethodIndex(dex_file->GetLocation(),
+ dex_file->GetLocationChecksum(),
+ 1,
+ dex_file->NumMethodIds());
+ profile_info_.AddMethodIndex(dex_file->GetLocation(),
+ dex_file->GetLocationChecksum(),
+ 2,
+ dex_file->NumMethodIds());
}
return &profile_info_;
}
@@ -339,10 +344,11 @@ class CompilerDriverVerifyTest : public CompilerDriverTest {
ASSERT_NE(klass, nullptr);
EXPECT_TRUE(klass->IsVerified());
- CompiledClass* compiled_class = compiler_driver_->GetCompiledClass(
- ClassReference(&klass->GetDexFile(), klass->GetDexTypeIndex().index_));
- ASSERT_NE(compiled_class, nullptr);
- EXPECT_EQ(compiled_class->GetStatus(), mirror::Class::kStatusVerified);
+ mirror::Class::Status status;
+ bool found = compiler_driver_->GetCompiledClass(
+ ClassReference(&klass->GetDexFile(), klass->GetDexTypeIndex().index_), &status);
+ ASSERT_TRUE(found);
+ EXPECT_EQ(status, mirror::Class::kStatusVerified);
}
};
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index a0c0a2acf6..a4e2083fe4 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -200,7 +200,7 @@ bool CompilerOptions::ParseCompilerOption(const StringPiece& option, UsageFn Usa
ParseDumpInitFailures(option, Usage);
} else if (option.starts_with("--dump-cfg=")) {
dump_cfg_file_name_ = option.substr(strlen("--dump-cfg=")).data();
- } else if (option.starts_with("--dump-cfg-append")) {
+ } else if (option == "--dump-cfg-append") {
dump_cfg_append_ = true;
} else if (option.starts_with("--register-allocation-strategy=")) {
ParseRegisterAllocationStrategy(option, Usage);
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 957ea99c49..89c2537476 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -31,7 +31,7 @@ namespace art {
namespace verifier {
class VerifierDepsTest;
-}
+} // namespace verifier
class DexFile;
@@ -237,6 +237,10 @@ class CompilerOptions FINAL {
bool ParseCompilerOption(const StringPiece& option, UsageFn Usage);
+ void SetNonPic() {
+ compile_pic_ = false;
+ }
+
const std::string& GetDumpCfgFileName() const {
return dump_cfg_file_name_;
}
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 28c35e96b4..738f5a2b29 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -34,7 +34,7 @@
#include "leb128.h"
#include "linker/buffered_output_stream.h"
#include "linker/file_output_stream.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
#include "thread_pool.h"
#include "utils.h"
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 7e53d8d2ab..9d7aff769b 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -14,492 +14,17 @@
* limitations under the License.
*/
-#include "image.h"
-
-#include <memory>
-#include <string>
+#include <string.h>
#include <vector>
-#include "android-base/stringprintf.h"
+#include "image_test.h"
-#include "art_method-inl.h"
-#include "base/unix_file/fd_file.h"
-#include "class_linker-inl.h"
-#include "compiler_callbacks.h"
-#include "common_compiler_test.h"
-#include "debug/method_debug_info.h"
-#include "dex/quick_compiler_callbacks.h"
-#include "driver/compiler_options.h"
-#include "elf_writer.h"
-#include "elf_writer_quick.h"
-#include "gc/space/image_space.h"
-#include "image_writer.h"
-#include "linker/buffered_output_stream.h"
-#include "linker/file_output_stream.h"
-#include "linker/multi_oat_relative_patcher.h"
-#include "lock_word.h"
-#include "mirror/object-inl.h"
-#include "oat_writer.h"
+#include "image.h"
#include "scoped_thread_state_change-inl.h"
-#include "signal_catcher.h"
-#include "utils.h"
+#include "thread.h"
namespace art {
-static const uintptr_t kRequestedImageBase = ART_BASE_ADDRESS;
-
-struct CompilationHelper {
- std::vector<std::string> dex_file_locations;
- std::vector<ScratchFile> image_locations;
- std::vector<std::unique_ptr<const DexFile>> extra_dex_files;
- std::vector<ScratchFile> image_files;
- std::vector<ScratchFile> oat_files;
- std::vector<ScratchFile> vdex_files;
- std::string image_dir;
-
- void Compile(CompilerDriver* driver,
- ImageHeader::StorageMode storage_mode);
-
- std::vector<size_t> GetImageObjectSectionSizes();
-
- ~CompilationHelper();
-};
-
-class ImageTest : public CommonCompilerTest {
- protected:
- virtual void SetUp() {
- ReserveImageSpace();
- CommonCompilerTest::SetUp();
- }
-
- void TestWriteRead(ImageHeader::StorageMode storage_mode);
-
- void Compile(ImageHeader::StorageMode storage_mode,
- CompilationHelper& out_helper,
- const std::string& extra_dex = "",
- const std::initializer_list<std::string>& image_classes = {});
-
- void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE {
- CommonCompilerTest::SetUpRuntimeOptions(options);
- callbacks_.reset(new QuickCompilerCallbacks(
- verification_results_.get(),
- CompilerCallbacks::CallbackMode::kCompileBootImage));
- options->push_back(std::make_pair("compilercallbacks", callbacks_.get()));
- }
-
- std::unordered_set<std::string>* GetImageClasses() OVERRIDE {
- return new std::unordered_set<std::string>(image_classes_);
- }
-
- ArtMethod* FindCopiedMethod(ArtMethod* origin, mirror::Class* klass)
- REQUIRES_SHARED(Locks::mutator_lock_) {
- PointerSize pointer_size = class_linker_->GetImagePointerSize();
- for (ArtMethod& m : klass->GetCopiedMethods(pointer_size)) {
- if (strcmp(origin->GetName(), m.GetName()) == 0 &&
- origin->GetSignature() == m.GetSignature()) {
- return &m;
- }
- }
- return nullptr;
- }
-
- private:
- std::unordered_set<std::string> image_classes_;
-};
-
-CompilationHelper::~CompilationHelper() {
- for (ScratchFile& image_file : image_files) {
- image_file.Unlink();
- }
- for (ScratchFile& oat_file : oat_files) {
- oat_file.Unlink();
- }
- for (ScratchFile& vdex_file : vdex_files) {
- vdex_file.Unlink();
- }
- const int rmdir_result = rmdir(image_dir.c_str());
- CHECK_EQ(0, rmdir_result);
-}
-
-std::vector<size_t> CompilationHelper::GetImageObjectSectionSizes() {
- std::vector<size_t> ret;
- for (ScratchFile& image_file : image_files) {
- std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str()));
- CHECK(file.get() != nullptr);
- ImageHeader image_header;
- CHECK_EQ(file->ReadFully(&image_header, sizeof(image_header)), true);
- CHECK(image_header.IsValid());
- ret.push_back(image_header.GetImageSize());
- }
- return ret;
-}
-
-void CompilationHelper::Compile(CompilerDriver* driver,
- ImageHeader::StorageMode storage_mode) {
- ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
- std::vector<const DexFile*> class_path = class_linker->GetBootClassPath();
-
- for (const std::unique_ptr<const DexFile>& dex_file : extra_dex_files) {
- {
- ScopedObjectAccess soa(Thread::Current());
- // Inject in boot class path so that the compiler driver can see it.
- class_linker->AppendToBootClassPath(soa.Self(), *dex_file.get());
- }
- class_path.push_back(dex_file.get());
- }
-
- // Enable write for dex2dex.
- for (const DexFile* dex_file : class_path) {
- dex_file_locations.push_back(dex_file->GetLocation());
- if (dex_file->IsReadOnly()) {
- dex_file->EnableWrite();
- }
- }
- {
- // Create a generic tmp file, to be the base of the .art and .oat temporary files.
- ScratchFile location;
- for (int i = 0; i < static_cast<int>(class_path.size()); ++i) {
- std::string cur_location =
- android::base::StringPrintf("%s-%d.art", location.GetFilename().c_str(), i);
- image_locations.push_back(ScratchFile(cur_location));
- }
- }
- std::vector<std::string> image_filenames;
- for (ScratchFile& file : image_locations) {
- std::string image_filename(GetSystemImageFilename(file.GetFilename().c_str(), kRuntimeISA));
- image_filenames.push_back(image_filename);
- size_t pos = image_filename.rfind('/');
- CHECK_NE(pos, std::string::npos) << image_filename;
- if (image_dir.empty()) {
- image_dir = image_filename.substr(0, pos);
- int mkdir_result = mkdir(image_dir.c_str(), 0700);
- CHECK_EQ(0, mkdir_result) << image_dir;
- }
- image_files.push_back(ScratchFile(OS::CreateEmptyFile(image_filename.c_str())));
- }
-
- std::vector<std::string> oat_filenames;
- std::vector<std::string> vdex_filenames;
- for (const std::string& image_filename : image_filenames) {
- std::string oat_filename = ReplaceFileExtension(image_filename, "oat");
- oat_files.push_back(ScratchFile(OS::CreateEmptyFile(oat_filename.c_str())));
- oat_filenames.push_back(oat_filename);
- std::string vdex_filename = ReplaceFileExtension(image_filename, "vdex");
- vdex_files.push_back(ScratchFile(OS::CreateEmptyFile(vdex_filename.c_str())));
- vdex_filenames.push_back(vdex_filename);
- }
-
- std::unordered_map<const DexFile*, size_t> dex_file_to_oat_index_map;
- std::vector<const char*> oat_filename_vector;
- for (const std::string& file : oat_filenames) {
- oat_filename_vector.push_back(file.c_str());
- }
- std::vector<const char*> image_filename_vector;
- for (const std::string& file : image_filenames) {
- image_filename_vector.push_back(file.c_str());
- }
- size_t image_idx = 0;
- for (const DexFile* dex_file : class_path) {
- dex_file_to_oat_index_map.emplace(dex_file, image_idx);
- ++image_idx;
- }
- // TODO: compile_pic should be a test argument.
- std::unique_ptr<ImageWriter> writer(new ImageWriter(*driver,
- kRequestedImageBase,
- /*compile_pic*/false,
- /*compile_app_image*/false,
- storage_mode,
- oat_filename_vector,
- dex_file_to_oat_index_map));
- {
- {
- jobject class_loader = nullptr;
- TimingLogger timings("ImageTest::WriteRead", false, false);
- TimingLogger::ScopedTiming t("CompileAll", &timings);
- driver->SetDexFilesForOatFile(class_path);
- driver->CompileAll(class_loader, class_path, /* verifier_deps */ nullptr, &timings);
-
- t.NewTiming("WriteElf");
- SafeMap<std::string, std::string> key_value_store;
- std::vector<const char*> dex_filename_vector;
- for (size_t i = 0; i < class_path.size(); ++i) {
- dex_filename_vector.push_back("");
- }
- key_value_store.Put(OatHeader::kBootClassPathKey,
- gc::space::ImageSpace::GetMultiImageBootClassPath(
- dex_filename_vector,
- oat_filename_vector,
- image_filename_vector));
-
- std::vector<std::unique_ptr<ElfWriter>> elf_writers;
- std::vector<std::unique_ptr<OatWriter>> oat_writers;
- for (ScratchFile& oat_file : oat_files) {
- elf_writers.emplace_back(CreateElfWriterQuick(driver->GetInstructionSet(),
- driver->GetInstructionSetFeatures(),
- &driver->GetCompilerOptions(),
- oat_file.GetFile()));
- elf_writers.back()->Start();
- oat_writers.emplace_back(new OatWriter(/*compiling_boot_image*/true,
- &timings,
- /*profile_compilation_info*/nullptr));
- }
-
- std::vector<OutputStream*> rodata;
- std::vector<std::unique_ptr<MemMap>> opened_dex_files_map;
- std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
- // Now that we have finalized key_value_store_, start writing the oat file.
- for (size_t i = 0, size = oat_writers.size(); i != size; ++i) {
- const DexFile* dex_file = class_path[i];
- rodata.push_back(elf_writers[i]->StartRoData());
- ArrayRef<const uint8_t> raw_dex_file(
- reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()),
- dex_file->GetHeader().file_size_);
- oat_writers[i]->AddRawDexFileSource(raw_dex_file,
- dex_file->GetLocation().c_str(),
- dex_file->GetLocationChecksum());
-
- std::unique_ptr<MemMap> cur_opened_dex_files_map;
- std::vector<std::unique_ptr<const DexFile>> cur_opened_dex_files;
- bool dex_files_ok = oat_writers[i]->WriteAndOpenDexFiles(
- kIsVdexEnabled ? vdex_files[i].GetFile() : oat_files[i].GetFile(),
- rodata.back(),
- driver->GetInstructionSet(),
- driver->GetInstructionSetFeatures(),
- &key_value_store,
- /* verify */ false, // Dex files may be dex-to-dex-ed, don't verify.
- /* update_input_vdex */ false,
- &cur_opened_dex_files_map,
- &cur_opened_dex_files);
- ASSERT_TRUE(dex_files_ok);
-
- if (cur_opened_dex_files_map != nullptr) {
- opened_dex_files_map.push_back(std::move(cur_opened_dex_files_map));
- for (std::unique_ptr<const DexFile>& cur_dex_file : cur_opened_dex_files) {
- // dex_file_oat_index_map_.emplace(dex_file.get(), i);
- opened_dex_files.push_back(std::move(cur_dex_file));
- }
- } else {
- ASSERT_TRUE(cur_opened_dex_files.empty());
- }
- }
- bool image_space_ok = writer->PrepareImageAddressSpace();
- ASSERT_TRUE(image_space_ok);
-
- if (kIsVdexEnabled) {
- for (size_t i = 0, size = vdex_files.size(); i != size; ++i) {
- std::unique_ptr<BufferedOutputStream> vdex_out(
- MakeUnique<BufferedOutputStream>(
- MakeUnique<FileOutputStream>(vdex_files[i].GetFile())));
- oat_writers[i]->WriteVerifierDeps(vdex_out.get(), nullptr);
- oat_writers[i]->WriteChecksumsAndVdexHeader(vdex_out.get());
- }
- }
-
- for (size_t i = 0, size = oat_files.size(); i != size; ++i) {
- linker::MultiOatRelativePatcher patcher(driver->GetInstructionSet(),
- driver->GetInstructionSetFeatures());
- OatWriter* const oat_writer = oat_writers[i].get();
- ElfWriter* const elf_writer = elf_writers[i].get();
- std::vector<const DexFile*> cur_dex_files(1u, class_path[i]);
- oat_writer->Initialize(driver, writer.get(), cur_dex_files);
- oat_writer->PrepareLayout(&patcher);
- size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset();
- size_t text_size = oat_writer->GetOatSize() - rodata_size;
- elf_writer->PrepareDynamicSection(rodata_size,
- text_size,
- oat_writer->GetBssSize(),
- oat_writer->GetBssRootsOffset());
-
- writer->UpdateOatFileLayout(i,
- elf_writer->GetLoadedSize(),
- oat_writer->GetOatDataOffset(),
- oat_writer->GetOatSize());
-
- bool rodata_ok = oat_writer->WriteRodata(rodata[i]);
- ASSERT_TRUE(rodata_ok);
- elf_writer->EndRoData(rodata[i]);
-
- OutputStream* text = elf_writer->StartText();
- bool text_ok = oat_writer->WriteCode(text);
- ASSERT_TRUE(text_ok);
- elf_writer->EndText(text);
-
- bool header_ok = oat_writer->WriteHeader(elf_writer->GetStream(), 0u, 0u, 0u);
- ASSERT_TRUE(header_ok);
-
- writer->UpdateOatFileHeader(i, oat_writer->GetOatHeader());
-
- elf_writer->WriteDynamicSection();
- elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo());
-
- bool success = elf_writer->End();
- ASSERT_TRUE(success);
- }
- }
-
- bool success_image = writer->Write(kInvalidFd,
- image_filename_vector,
- oat_filename_vector);
- ASSERT_TRUE(success_image);
-
- for (size_t i = 0, size = oat_filenames.size(); i != size; ++i) {
- const char* oat_filename = oat_filenames[i].c_str();
- std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename));
- ASSERT_TRUE(oat_file != nullptr);
- bool success_fixup = ElfWriter::Fixup(oat_file.get(),
- writer->GetOatDataBegin(i));
- ASSERT_TRUE(success_fixup);
- ASSERT_EQ(oat_file->FlushCloseOrErase(), 0) << "Could not flush and close oat file "
- << oat_filename;
- }
- }
-}
-
-void ImageTest::Compile(ImageHeader::StorageMode storage_mode,
- CompilationHelper& helper,
- const std::string& extra_dex,
- const std::initializer_list<std::string>& image_classes) {
- for (const std::string& image_class : image_classes) {
- image_classes_.insert(image_class);
- }
- CreateCompilerDriver(Compiler::kOptimizing, kRuntimeISA, kIsTargetBuild ? 2U : 16U);
- // Set inline filter values.
- compiler_options_->SetInlineMaxCodeUnits(CompilerOptions::kDefaultInlineMaxCodeUnits);
- image_classes_.clear();
- if (!extra_dex.empty()) {
- helper.extra_dex_files = OpenTestDexFiles(extra_dex.c_str());
- }
- helper.Compile(compiler_driver_.get(), storage_mode);
- if (image_classes.begin() != image_classes.end()) {
- // Make sure the class got initialized.
- ScopedObjectAccess soa(Thread::Current());
- ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
- for (const std::string& image_class : image_classes) {
- mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str());
- EXPECT_TRUE(klass != nullptr);
- EXPECT_TRUE(klass->IsInitialized());
- }
- }
-}
-
-void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) {
- CompilationHelper helper;
- Compile(storage_mode, /*out*/ helper);
- std::vector<uint64_t> image_file_sizes;
- for (ScratchFile& image_file : helper.image_files) {
- std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str()));
- ASSERT_TRUE(file.get() != nullptr);
- ImageHeader image_header;
- ASSERT_EQ(file->ReadFully(&image_header, sizeof(image_header)), true);
- ASSERT_TRUE(image_header.IsValid());
- const auto& bitmap_section = image_header.GetImageSection(ImageHeader::kSectionImageBitmap);
- ASSERT_GE(bitmap_section.Offset(), sizeof(image_header));
- ASSERT_NE(0U, bitmap_section.Size());
-
- gc::Heap* heap = Runtime::Current()->GetHeap();
- ASSERT_TRUE(heap->HaveContinuousSpaces());
- gc::space::ContinuousSpace* space = heap->GetNonMovingSpace();
- ASSERT_FALSE(space->IsImageSpace());
- ASSERT_TRUE(space != nullptr);
- ASSERT_TRUE(space->IsMallocSpace());
- image_file_sizes.push_back(file->GetLength());
- }
-
- ASSERT_TRUE(compiler_driver_->GetImageClasses() != nullptr);
- std::unordered_set<std::string> image_classes(*compiler_driver_->GetImageClasses());
-
- // Need to delete the compiler since it has worker threads which are attached to runtime.
- compiler_driver_.reset();
-
- // Tear down old runtime before making a new one, clearing out misc state.
-
- // Remove the reservation of the memory for use to load the image.
- // Need to do this before we reset the runtime.
- UnreserveImageSpace();
-
- helper.extra_dex_files.clear();
- runtime_.reset();
- java_lang_dex_file_ = nullptr;
-
- MemMap::Init();
-
- RuntimeOptions options;
- std::string image("-Ximage:");
- image.append(helper.image_locations[0].GetFilename());
- options.push_back(std::make_pair(image.c_str(), static_cast<void*>(nullptr)));
- // By default the compiler this creates will not include patch information.
- options.push_back(std::make_pair("-Xnorelocate", nullptr));
-
- if (!Runtime::Create(options, false)) {
- LOG(FATAL) << "Failed to create runtime";
- return;
- }
- runtime_.reset(Runtime::Current());
- // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
- // give it away now and then switch to a more managable ScopedObjectAccess.
- Thread::Current()->TransitionFromRunnableToSuspended(kNative);
- ScopedObjectAccess soa(Thread::Current());
- ASSERT_TRUE(runtime_.get() != nullptr);
- class_linker_ = runtime_->GetClassLinker();
-
- gc::Heap* heap = Runtime::Current()->GetHeap();
- ASSERT_TRUE(heap->HasBootImageSpace());
- ASSERT_TRUE(heap->GetNonMovingSpace()->IsMallocSpace());
-
- // We loaded the runtime with an explicit image, so it must exist.
- ASSERT_EQ(heap->GetBootImageSpaces().size(), image_file_sizes.size());
- for (size_t i = 0; i < helper.dex_file_locations.size(); ++i) {
- std::unique_ptr<const DexFile> dex(
- LoadExpectSingleDexFile(helper.dex_file_locations[i].c_str()));
- ASSERT_TRUE(dex != nullptr);
- uint64_t image_file_size = image_file_sizes[i];
- gc::space::ImageSpace* image_space = heap->GetBootImageSpaces()[i];
- ASSERT_TRUE(image_space != nullptr);
- if (storage_mode == ImageHeader::kStorageModeUncompressed) {
- // Uncompressed, image should be smaller than file.
- ASSERT_LE(image_space->GetImageHeader().GetImageSize(), image_file_size);
- } else if (image_file_size > 16 * KB) {
- // Compressed, file should be smaller than image. Not really valid for small images.
- ASSERT_LE(image_file_size, image_space->GetImageHeader().GetImageSize());
- }
-
- image_space->VerifyImageAllocations();
- uint8_t* image_begin = image_space->Begin();
- uint8_t* image_end = image_space->End();
- if (i == 0) {
- // This check is only valid for image 0.
- CHECK_EQ(kRequestedImageBase, reinterpret_cast<uintptr_t>(image_begin));
- }
- for (size_t j = 0; j < dex->NumClassDefs(); ++j) {
- const DexFile::ClassDef& class_def = dex->GetClassDef(j);
- const char* descriptor = dex->GetClassDescriptor(class_def);
- mirror::Class* klass = class_linker_->FindSystemClass(soa.Self(), descriptor);
- EXPECT_TRUE(klass != nullptr) << descriptor;
- if (image_classes.find(descriptor) == image_classes.end()) {
- EXPECT_TRUE(reinterpret_cast<uint8_t*>(klass) >= image_end ||
- reinterpret_cast<uint8_t*>(klass) < image_begin) << descriptor;
- } else {
- // Image classes should be located inside the image.
- EXPECT_LT(image_begin, reinterpret_cast<uint8_t*>(klass)) << descriptor;
- EXPECT_LT(reinterpret_cast<uint8_t*>(klass), image_end) << descriptor;
- }
- EXPECT_TRUE(Monitor::IsValidLockWord(klass->GetLockWord(false)));
- }
- }
-}
-
-TEST_F(ImageTest, WriteReadUncompressed) {
- TestWriteRead(ImageHeader::kStorageModeUncompressed);
-}
-
-TEST_F(ImageTest, WriteReadLZ4) {
- TestWriteRead(ImageHeader::kStorageModeLZ4);
-}
-
-TEST_F(ImageTest, WriteReadLZ4HC) {
- TestWriteRead(ImageHeader::kStorageModeLZ4HC);
-}
-
TEST_F(ImageTest, TestImageLayout) {
std::vector<size_t> image_sizes;
std::vector<size_t> image_sizes_extra;
diff --git a/compiler/image_test.h b/compiler/image_test.h
new file mode 100644
index 0000000000..2f15ff4815
--- /dev/null
+++ b/compiler/image_test.h
@@ -0,0 +1,497 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_IMAGE_TEST_H_
+#define ART_COMPILER_IMAGE_TEST_H_
+
+#include "image.h"
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "android-base/stringprintf.h"
+
+#include "art_method-inl.h"
+#include "base/unix_file/fd_file.h"
+#include "class_linker-inl.h"
+#include "compiler_callbacks.h"
+#include "common_compiler_test.h"
+#include "debug/method_debug_info.h"
+#include "dex/quick_compiler_callbacks.h"
+#include "driver/compiler_options.h"
+#include "elf_writer.h"
+#include "elf_writer_quick.h"
+#include "gc/space/image_space.h"
+#include "image_writer.h"
+#include "linker/buffered_output_stream.h"
+#include "linker/file_output_stream.h"
+#include "linker/multi_oat_relative_patcher.h"
+#include "lock_word.h"
+#include "mirror/object-inl.h"
+#include "oat_writer.h"
+#include "scoped_thread_state_change-inl.h"
+#include "signal_catcher.h"
+#include "utils.h"
+
+namespace art {
+
+static const uintptr_t kRequestedImageBase = ART_BASE_ADDRESS;
+
+struct CompilationHelper {
+ std::vector<std::string> dex_file_locations;
+ std::vector<ScratchFile> image_locations;
+ std::vector<std::unique_ptr<const DexFile>> extra_dex_files;
+ std::vector<ScratchFile> image_files;
+ std::vector<ScratchFile> oat_files;
+ std::vector<ScratchFile> vdex_files;
+ std::string image_dir;
+
+ void Compile(CompilerDriver* driver,
+ ImageHeader::StorageMode storage_mode);
+
+ std::vector<size_t> GetImageObjectSectionSizes();
+
+ ~CompilationHelper();
+};
+
+class ImageTest : public CommonCompilerTest {
+ protected:
+ virtual void SetUp() {
+ ReserveImageSpace();
+ CommonCompilerTest::SetUp();
+ }
+
+ void TestWriteRead(ImageHeader::StorageMode storage_mode);
+
+ void Compile(ImageHeader::StorageMode storage_mode,
+ CompilationHelper& out_helper,
+ const std::string& extra_dex = "",
+ const std::initializer_list<std::string>& image_classes = {});
+
+ void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE {
+ CommonCompilerTest::SetUpRuntimeOptions(options);
+ callbacks_.reset(new QuickCompilerCallbacks(
+ verification_results_.get(),
+ CompilerCallbacks::CallbackMode::kCompileBootImage));
+ options->push_back(std::make_pair("compilercallbacks", callbacks_.get()));
+ }
+
+ std::unordered_set<std::string>* GetImageClasses() OVERRIDE {
+ return new std::unordered_set<std::string>(image_classes_);
+ }
+
+ ArtMethod* FindCopiedMethod(ArtMethod* origin, mirror::Class* klass)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ PointerSize pointer_size = class_linker_->GetImagePointerSize();
+ for (ArtMethod& m : klass->GetCopiedMethods(pointer_size)) {
+ if (strcmp(origin->GetName(), m.GetName()) == 0 &&
+ origin->GetSignature() == m.GetSignature()) {
+ return &m;
+ }
+ }
+ return nullptr;
+ }
+
+ private:
+ std::unordered_set<std::string> image_classes_;
+};
+
+inline CompilationHelper::~CompilationHelper() {
+ for (ScratchFile& image_file : image_files) {
+ image_file.Unlink();
+ }
+ for (ScratchFile& oat_file : oat_files) {
+ oat_file.Unlink();
+ }
+ for (ScratchFile& vdex_file : vdex_files) {
+ vdex_file.Unlink();
+ }
+ const int rmdir_result = rmdir(image_dir.c_str());
+ CHECK_EQ(0, rmdir_result);
+}
+
+inline std::vector<size_t> CompilationHelper::GetImageObjectSectionSizes() {
+ std::vector<size_t> ret;
+ for (ScratchFile& image_file : image_files) {
+ std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str()));
+ CHECK(file.get() != nullptr);
+ ImageHeader image_header;
+ CHECK_EQ(file->ReadFully(&image_header, sizeof(image_header)), true);
+ CHECK(image_header.IsValid());
+ ret.push_back(image_header.GetImageSize());
+ }
+ return ret;
+}
+
+inline void CompilationHelper::Compile(CompilerDriver* driver,
+ ImageHeader::StorageMode storage_mode) {
+ ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+ std::vector<const DexFile*> class_path = class_linker->GetBootClassPath();
+
+ for (const std::unique_ptr<const DexFile>& dex_file : extra_dex_files) {
+ {
+ ScopedObjectAccess soa(Thread::Current());
+ // Inject in boot class path so that the compiler driver can see it.
+ class_linker->AppendToBootClassPath(soa.Self(), *dex_file.get());
+ }
+ class_path.push_back(dex_file.get());
+ }
+
+ // Enable write for dex2dex.
+ for (const DexFile* dex_file : class_path) {
+ dex_file_locations.push_back(dex_file->GetLocation());
+ if (dex_file->IsReadOnly()) {
+ dex_file->EnableWrite();
+ }
+ }
+ {
+ // Create a generic tmp file, to be the base of the .art and .oat temporary files.
+ ScratchFile location;
+ for (int i = 0; i < static_cast<int>(class_path.size()); ++i) {
+ std::string cur_location =
+ android::base::StringPrintf("%s-%d.art", location.GetFilename().c_str(), i);
+ image_locations.push_back(ScratchFile(cur_location));
+ }
+ }
+ std::vector<std::string> image_filenames;
+ for (ScratchFile& file : image_locations) {
+ std::string image_filename(GetSystemImageFilename(file.GetFilename().c_str(), kRuntimeISA));
+ image_filenames.push_back(image_filename);
+ size_t pos = image_filename.rfind('/');
+ CHECK_NE(pos, std::string::npos) << image_filename;
+ if (image_dir.empty()) {
+ image_dir = image_filename.substr(0, pos);
+ int mkdir_result = mkdir(image_dir.c_str(), 0700);
+ CHECK_EQ(0, mkdir_result) << image_dir;
+ }
+ image_files.push_back(ScratchFile(OS::CreateEmptyFile(image_filename.c_str())));
+ }
+
+ std::vector<std::string> oat_filenames;
+ std::vector<std::string> vdex_filenames;
+ for (const std::string& image_filename : image_filenames) {
+ std::string oat_filename = ReplaceFileExtension(image_filename, "oat");
+ oat_files.push_back(ScratchFile(OS::CreateEmptyFile(oat_filename.c_str())));
+ oat_filenames.push_back(oat_filename);
+ std::string vdex_filename = ReplaceFileExtension(image_filename, "vdex");
+ vdex_files.push_back(ScratchFile(OS::CreateEmptyFile(vdex_filename.c_str())));
+ vdex_filenames.push_back(vdex_filename);
+ }
+
+ std::unordered_map<const DexFile*, size_t> dex_file_to_oat_index_map;
+ std::vector<const char*> oat_filename_vector;
+ for (const std::string& file : oat_filenames) {
+ oat_filename_vector.push_back(file.c_str());
+ }
+ std::vector<const char*> image_filename_vector;
+ for (const std::string& file : image_filenames) {
+ image_filename_vector.push_back(file.c_str());
+ }
+ size_t image_idx = 0;
+ for (const DexFile* dex_file : class_path) {
+ dex_file_to_oat_index_map.emplace(dex_file, image_idx);
+ ++image_idx;
+ }
+ // TODO: compile_pic should be a test argument.
+ std::unique_ptr<ImageWriter> writer(new ImageWriter(*driver,
+ kRequestedImageBase,
+ /*compile_pic*/false,
+ /*compile_app_image*/false,
+ storage_mode,
+ oat_filename_vector,
+ dex_file_to_oat_index_map));
+ {
+ {
+ jobject class_loader = nullptr;
+ TimingLogger timings("ImageTest::WriteRead", false, false);
+ TimingLogger::ScopedTiming t("CompileAll", &timings);
+ driver->SetDexFilesForOatFile(class_path);
+ driver->CompileAll(class_loader, class_path, /* verifier_deps */ nullptr, &timings);
+
+ t.NewTiming("WriteElf");
+ SafeMap<std::string, std::string> key_value_store;
+ std::vector<const char*> dex_filename_vector;
+ for (size_t i = 0; i < class_path.size(); ++i) {
+ dex_filename_vector.push_back("");
+ }
+ key_value_store.Put(OatHeader::kBootClassPathKey,
+ gc::space::ImageSpace::GetMultiImageBootClassPath(
+ dex_filename_vector,
+ oat_filename_vector,
+ image_filename_vector));
+
+ std::vector<std::unique_ptr<ElfWriter>> elf_writers;
+ std::vector<std::unique_ptr<OatWriter>> oat_writers;
+ for (ScratchFile& oat_file : oat_files) {
+ elf_writers.emplace_back(CreateElfWriterQuick(driver->GetInstructionSet(),
+ driver->GetInstructionSetFeatures(),
+ &driver->GetCompilerOptions(),
+ oat_file.GetFile()));
+ elf_writers.back()->Start();
+ oat_writers.emplace_back(new OatWriter(/*compiling_boot_image*/true,
+ &timings,
+ /*profile_compilation_info*/nullptr));
+ }
+
+ std::vector<OutputStream*> rodata;
+ std::vector<std::unique_ptr<MemMap>> opened_dex_files_map;
+ std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+ // Now that we have finalized key_value_store_, start writing the oat file.
+ for (size_t i = 0, size = oat_writers.size(); i != size; ++i) {
+ const DexFile* dex_file = class_path[i];
+ rodata.push_back(elf_writers[i]->StartRoData());
+ ArrayRef<const uint8_t> raw_dex_file(
+ reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()),
+ dex_file->GetHeader().file_size_);
+ oat_writers[i]->AddRawDexFileSource(raw_dex_file,
+ dex_file->GetLocation().c_str(),
+ dex_file->GetLocationChecksum());
+
+ std::unique_ptr<MemMap> cur_opened_dex_files_map;
+ std::vector<std::unique_ptr<const DexFile>> cur_opened_dex_files;
+ bool dex_files_ok = oat_writers[i]->WriteAndOpenDexFiles(
+ kIsVdexEnabled ? vdex_files[i].GetFile() : oat_files[i].GetFile(),
+ rodata.back(),
+ driver->GetInstructionSet(),
+ driver->GetInstructionSetFeatures(),
+ &key_value_store,
+ /* verify */ false, // Dex files may be dex-to-dex-ed, don't verify.
+ /* update_input_vdex */ false,
+ &cur_opened_dex_files_map,
+ &cur_opened_dex_files);
+ ASSERT_TRUE(dex_files_ok);
+
+ if (cur_opened_dex_files_map != nullptr) {
+ opened_dex_files_map.push_back(std::move(cur_opened_dex_files_map));
+ for (std::unique_ptr<const DexFile>& cur_dex_file : cur_opened_dex_files) {
+ // dex_file_oat_index_map_.emplace(dex_file.get(), i);
+ opened_dex_files.push_back(std::move(cur_dex_file));
+ }
+ } else {
+ ASSERT_TRUE(cur_opened_dex_files.empty());
+ }
+ }
+ bool image_space_ok = writer->PrepareImageAddressSpace();
+ ASSERT_TRUE(image_space_ok);
+
+ if (kIsVdexEnabled) {
+ for (size_t i = 0, size = vdex_files.size(); i != size; ++i) {
+ std::unique_ptr<BufferedOutputStream> vdex_out(
+ MakeUnique<BufferedOutputStream>(
+ MakeUnique<FileOutputStream>(vdex_files[i].GetFile())));
+ oat_writers[i]->WriteVerifierDeps(vdex_out.get(), nullptr);
+ oat_writers[i]->WriteChecksumsAndVdexHeader(vdex_out.get());
+ }
+ }
+
+ for (size_t i = 0, size = oat_files.size(); i != size; ++i) {
+ linker::MultiOatRelativePatcher patcher(driver->GetInstructionSet(),
+ driver->GetInstructionSetFeatures());
+ OatWriter* const oat_writer = oat_writers[i].get();
+ ElfWriter* const elf_writer = elf_writers[i].get();
+ std::vector<const DexFile*> cur_dex_files(1u, class_path[i]);
+ oat_writer->Initialize(driver, writer.get(), cur_dex_files);
+ oat_writer->PrepareLayout(&patcher);
+ size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset();
+ size_t text_size = oat_writer->GetOatSize() - rodata_size;
+ elf_writer->PrepareDynamicSection(rodata_size,
+ text_size,
+ oat_writer->GetBssSize(),
+ oat_writer->GetBssRootsOffset());
+
+ writer->UpdateOatFileLayout(i,
+ elf_writer->GetLoadedSize(),
+ oat_writer->GetOatDataOffset(),
+ oat_writer->GetOatSize());
+
+ bool rodata_ok = oat_writer->WriteRodata(rodata[i]);
+ ASSERT_TRUE(rodata_ok);
+ elf_writer->EndRoData(rodata[i]);
+
+ OutputStream* text = elf_writer->StartText();
+ bool text_ok = oat_writer->WriteCode(text);
+ ASSERT_TRUE(text_ok);
+ elf_writer->EndText(text);
+
+ bool header_ok = oat_writer->WriteHeader(elf_writer->GetStream(), 0u, 0u, 0u);
+ ASSERT_TRUE(header_ok);
+
+ writer->UpdateOatFileHeader(i, oat_writer->GetOatHeader());
+
+ elf_writer->WriteDynamicSection();
+ elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo());
+
+ bool success = elf_writer->End();
+ ASSERT_TRUE(success);
+ }
+ }
+
+ bool success_image = writer->Write(kInvalidFd,
+ image_filename_vector,
+ oat_filename_vector);
+ ASSERT_TRUE(success_image);
+
+ for (size_t i = 0, size = oat_filenames.size(); i != size; ++i) {
+ const char* oat_filename = oat_filenames[i].c_str();
+ std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename));
+ ASSERT_TRUE(oat_file != nullptr);
+ bool success_fixup = ElfWriter::Fixup(oat_file.get(),
+ writer->GetOatDataBegin(i));
+ ASSERT_TRUE(success_fixup);
+ ASSERT_EQ(oat_file->FlushCloseOrErase(), 0) << "Could not flush and close oat file "
+ << oat_filename;
+ }
+ }
+}
+
+inline void ImageTest::Compile(ImageHeader::StorageMode storage_mode,
+ CompilationHelper& helper,
+ const std::string& extra_dex,
+ const std::initializer_list<std::string>& image_classes) {
+ for (const std::string& image_class : image_classes) {
+ image_classes_.insert(image_class);
+ }
+ CreateCompilerDriver(Compiler::kOptimizing, kRuntimeISA, kIsTargetBuild ? 2U : 16U);
+ // Set inline filter values.
+ compiler_options_->SetInlineMaxCodeUnits(CompilerOptions::kDefaultInlineMaxCodeUnits);
+ image_classes_.clear();
+ if (!extra_dex.empty()) {
+ helper.extra_dex_files = OpenTestDexFiles(extra_dex.c_str());
+ }
+ helper.Compile(compiler_driver_.get(), storage_mode);
+ if (image_classes.begin() != image_classes.end()) {
+ // Make sure the class got initialized.
+ ScopedObjectAccess soa(Thread::Current());
+ ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+ for (const std::string& image_class : image_classes) {
+ mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str());
+ EXPECT_TRUE(klass != nullptr);
+ EXPECT_TRUE(klass->IsInitialized());
+ }
+ }
+}
+
+inline void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) {
+ CompilationHelper helper;
+ Compile(storage_mode, /*out*/ helper);
+ std::vector<uint64_t> image_file_sizes;
+ for (ScratchFile& image_file : helper.image_files) {
+ std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str()));
+ ASSERT_TRUE(file.get() != nullptr);
+ ImageHeader image_header;
+ ASSERT_EQ(file->ReadFully(&image_header, sizeof(image_header)), true);
+ ASSERT_TRUE(image_header.IsValid());
+ const auto& bitmap_section = image_header.GetImageSection(ImageHeader::kSectionImageBitmap);
+ ASSERT_GE(bitmap_section.Offset(), sizeof(image_header));
+ ASSERT_NE(0U, bitmap_section.Size());
+
+ gc::Heap* heap = Runtime::Current()->GetHeap();
+ ASSERT_TRUE(heap->HaveContinuousSpaces());
+ gc::space::ContinuousSpace* space = heap->GetNonMovingSpace();
+ ASSERT_FALSE(space->IsImageSpace());
+ ASSERT_TRUE(space != nullptr);
+ ASSERT_TRUE(space->IsMallocSpace());
+ image_file_sizes.push_back(file->GetLength());
+ }
+
+ ASSERT_TRUE(compiler_driver_->GetImageClasses() != nullptr);
+ std::unordered_set<std::string> image_classes(*compiler_driver_->GetImageClasses());
+
+ // Need to delete the compiler since it has worker threads which are attached to runtime.
+ compiler_driver_.reset();
+
+ // Tear down old runtime before making a new one, clearing out misc state.
+
+ // Remove the reservation of the memory for use to load the image.
+ // Need to do this before we reset the runtime.
+ UnreserveImageSpace();
+
+ helper.extra_dex_files.clear();
+ runtime_.reset();
+ java_lang_dex_file_ = nullptr;
+
+ MemMap::Init();
+
+ RuntimeOptions options;
+ std::string image("-Ximage:");
+ image.append(helper.image_locations[0].GetFilename());
+ options.push_back(std::make_pair(image.c_str(), static_cast<void*>(nullptr)));
+ // By default the compiler this creates will not include patch information.
+ options.push_back(std::make_pair("-Xnorelocate", nullptr));
+
+ if (!Runtime::Create(options, false)) {
+ LOG(FATAL) << "Failed to create runtime";
+ return;
+ }
+ runtime_.reset(Runtime::Current());
+ // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
+ // give it away now and then switch to a more managable ScopedObjectAccess.
+ Thread::Current()->TransitionFromRunnableToSuspended(kNative);
+ ScopedObjectAccess soa(Thread::Current());
+ ASSERT_TRUE(runtime_.get() != nullptr);
+ class_linker_ = runtime_->GetClassLinker();
+
+ gc::Heap* heap = Runtime::Current()->GetHeap();
+ ASSERT_TRUE(heap->HasBootImageSpace());
+ ASSERT_TRUE(heap->GetNonMovingSpace()->IsMallocSpace());
+
+ // We loaded the runtime with an explicit image, so it must exist.
+ ASSERT_EQ(heap->GetBootImageSpaces().size(), image_file_sizes.size());
+ for (size_t i = 0; i < helper.dex_file_locations.size(); ++i) {
+ std::unique_ptr<const DexFile> dex(
+ LoadExpectSingleDexFile(helper.dex_file_locations[i].c_str()));
+ ASSERT_TRUE(dex != nullptr);
+ uint64_t image_file_size = image_file_sizes[i];
+ gc::space::ImageSpace* image_space = heap->GetBootImageSpaces()[i];
+ ASSERT_TRUE(image_space != nullptr);
+ if (storage_mode == ImageHeader::kStorageModeUncompressed) {
+ // Uncompressed, image should be smaller than file.
+ ASSERT_LE(image_space->GetImageHeader().GetImageSize(), image_file_size);
+ } else if (image_file_size > 16 * KB) {
+ // Compressed, file should be smaller than image. Not really valid for small images.
+ ASSERT_LE(image_file_size, image_space->GetImageHeader().GetImageSize());
+ }
+
+ image_space->VerifyImageAllocations();
+ uint8_t* image_begin = image_space->Begin();
+ uint8_t* image_end = image_space->End();
+ if (i == 0) {
+ // This check is only valid for image 0.
+ CHECK_EQ(kRequestedImageBase, reinterpret_cast<uintptr_t>(image_begin));
+ }
+ for (size_t j = 0; j < dex->NumClassDefs(); ++j) {
+ const DexFile::ClassDef& class_def = dex->GetClassDef(j);
+ const char* descriptor = dex->GetClassDescriptor(class_def);
+ mirror::Class* klass = class_linker_->FindSystemClass(soa.Self(), descriptor);
+ EXPECT_TRUE(klass != nullptr) << descriptor;
+ if (image_classes.find(descriptor) == image_classes.end()) {
+ EXPECT_TRUE(reinterpret_cast<uint8_t*>(klass) >= image_end ||
+ reinterpret_cast<uint8_t*>(klass) < image_begin) << descriptor;
+ } else {
+ // Image classes should be located inside the image.
+ EXPECT_LT(image_begin, reinterpret_cast<uint8_t*>(klass)) << descriptor;
+ EXPECT_LT(reinterpret_cast<uint8_t*>(klass), image_end) << descriptor;
+ }
+ EXPECT_TRUE(Monitor::IsValidLockWord(klass->GetLockWord(false)));
+ }
+ }
+}
+
+
+} // namespace art
+
+#endif // ART_COMPILER_IMAGE_TEST_H_
diff --git a/compiler/compiled_class.h b/compiler/image_write_read_test.cc
index 06ce946942..32c0b06766 100644
--- a/compiler/compiled_class.h
+++ b/compiler/image_write_read_test.cc
@@ -14,27 +14,20 @@
* limitations under the License.
*/
-#ifndef ART_COMPILER_COMPILED_CLASS_H_
-#define ART_COMPILER_COMPILED_CLASS_H_
-
-#include "mirror/class.h"
+#include "image_test.h"
namespace art {
-class CompiledClass {
- public:
- explicit CompiledClass(mirror::Class::Status status) : status_(status) {}
- ~CompiledClass() {}
- mirror::Class::Status GetStatus() const {
- return status_;
- }
- void SetStatus(mirror::Class::Status status) {
- status_ = status;
- }
- private:
- mirror::Class::Status status_;
-};
+TEST_F(ImageTest, WriteReadUncompressed) {
+ TestWriteRead(ImageHeader::kStorageModeUncompressed);
+}
-} // namespace art
+TEST_F(ImageTest, WriteReadLZ4) {
+ TestWriteRead(ImageHeader::kStorageModeLZ4);
+}
-#endif // ART_COMPILER_COMPILED_CLASS_H_
+TEST_F(ImageTest, WriteReadLZ4HC) {
+ TestWriteRead(ImageHeader::kStorageModeLZ4HC);
+}
+
+} // namespace art
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 39113c8143..2283b39773 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -33,8 +33,8 @@
#include "base/enums.h"
#include "base/length_prefixed_array.h"
#include "base/macros.h"
+#include "class_table.h"
#include "driver/compiler_driver.h"
-#include "gc/space/space.h"
#include "image.h"
#include "lock_word.h"
#include "mem_map.h"
@@ -47,6 +47,10 @@
namespace art {
namespace gc {
+namespace accounting {
+template <size_t kAlignment> class SpaceBitmap;
+typedef SpaceBitmap<kObjectAlignment> ContinuousSpaceBitmap;
+} // namespace accounting
namespace space {
class ImageSpace;
} // namespace space
@@ -57,7 +61,6 @@ class ClassLoader;
} // namespace mirror
class ClassLoaderVisitor;
-class ClassTable;
class ImtConflictTable;
static constexpr int kInvalidFd = -1;
diff --git a/compiler/intrinsics_list.h b/compiler/intrinsics_list.h
index 63c23cb074..c8a0119667 100644
--- a/compiler/intrinsics_list.h
+++ b/compiler/intrinsics_list.h
@@ -28,6 +28,9 @@
// The kNoThrow should be renamed to kNoVisibleThrow, as it is ok to GVN Integer.valueOf
// (kNoSideEffects), and it is also OK to remove it if it's unused.
+// Note: Thread.interrupted is marked with kAllSideEffects due to the lack of finer grain
+// side effects representation.
+
#define INTRINSICS_LIST(V) \
V(DoubleDoubleToRawLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Double;", "doubleToRawLongBits", "(D)J") \
V(DoubleDoubleToLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Double;", "doubleToLongBits", "(D)J") \
@@ -154,7 +157,8 @@
V(UnsafeStoreFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "storeFence", "()V") \
V(UnsafeFullFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "fullFence", "()V") \
V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/ref/Reference;", "getReferent", "()Ljava/lang/Object;") \
- V(IntegerValueOf, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Integer;", "valueOf", "(I)Ljava/lang/Integer;")
+ V(IntegerValueOf, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Integer;", "valueOf", "(I)Ljava/lang/Integer;") \
+ V(ThreadInterrupted, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kNoThrow, "Ljava/lang/Thread;", "interrupted", "()Z")
#endif // ART_COMPILER_INTRINSICS_LIST_H_
#undef ART_COMPILER_INTRINSICS_LIST_H_ // #define is only for lint.
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index a146274ddb..66135414f7 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -105,7 +105,7 @@ JitCompiler::JitCompiler() {
/* implicit_null_checks */ true,
/* implicit_so_checks */ true,
/* implicit_suspend_checks */ false,
- /* pic */ true, // TODO: Support non-PIC in optimizing.
+ /* pic */ false,
/* verbose_methods */ nullptr,
/* init_failure_output */ nullptr,
/* abort_on_hard_verifier_failure */ false,
@@ -117,6 +117,9 @@ JitCompiler::JitCompiler() {
for (const std::string& argument : Runtime::Current()->GetCompilerOptions()) {
compiler_options_->ParseCompilerOption(argument, Usage);
}
+ // JIT is never PIC, no matter what the runtime compiler options specify.
+ compiler_options_->SetNonPic();
+
const InstructionSet instruction_set = kRuntimeISA;
for (const StringPiece option : Runtime::Current()->GetCompilerOptions()) {
VLOG(compiler) << "JIT compiler option " << option;
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index f55d5a6fb8..c1ac230d43 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -249,7 +249,7 @@ uint32_t ArmBaseRelativePatcher::ReserveSpaceInternal(uint32_t offset,
// All remaining method call patches will be handled by this thunk.
DCHECK(!unprocessed_method_call_patches_.empty());
DCHECK_LE(thunk_offset - unprocessed_method_call_patches_.front().GetPatchOffset(),
- MaxPositiveDisplacement(ThunkType::kMethodCall));
+ MaxPositiveDisplacement(GetMethodCallKey()));
unprocessed_method_call_patches_.clear();
}
}
@@ -271,8 +271,8 @@ uint32_t ArmBaseRelativePatcher::CalculateMethodCallDisplacement(uint32_t patch_
DCHECK(method_call_thunk_ != nullptr);
// Unsigned arithmetic with its well-defined overflow behavior is just fine here.
uint32_t displacement = target_offset - patch_offset;
- uint32_t max_positive_displacement = MaxPositiveDisplacement(ThunkType::kMethodCall);
- uint32_t max_negative_displacement = MaxNegativeDisplacement(ThunkType::kMethodCall);
+ uint32_t max_positive_displacement = MaxPositiveDisplacement(GetMethodCallKey());
+ uint32_t max_negative_displacement = MaxNegativeDisplacement(GetMethodCallKey());
// NOTE: With unsigned arithmetic we do mean to use && rather than || below.
if (displacement > max_positive_displacement && displacement < -max_negative_displacement) {
// Unwritten thunks have higher offsets, check if it's within range.
@@ -299,29 +299,40 @@ uint32_t ArmBaseRelativePatcher::GetThunkTargetOffset(const ThunkKey& key, uint3
if (data.HasWrittenOffset()) {
uint32_t offset = data.LastWrittenOffset();
DCHECK_LT(offset, patch_offset);
- if (patch_offset - offset <= MaxNegativeDisplacement(key.GetType())) {
+ if (patch_offset - offset <= MaxNegativeDisplacement(key)) {
return offset;
}
}
DCHECK(data.HasPendingOffset());
uint32_t offset = data.GetPendingOffset();
DCHECK_GT(offset, patch_offset);
- DCHECK_LE(offset - patch_offset, MaxPositiveDisplacement(key.GetType()));
+ DCHECK_LE(offset - patch_offset, MaxPositiveDisplacement(key));
return offset;
}
+ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetMethodCallKey() {
+ return ThunkKey(ThunkType::kMethodCall);
+}
+
+ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetBakerThunkKey(
+ const LinkerPatch& patch) {
+ DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kBakerReadBarrierBranch);
+ return ThunkKey(ThunkType::kBakerReadBarrier,
+ patch.GetBakerCustomValue1(),
+ patch.GetBakerCustomValue2());
+}
+
void ArmBaseRelativePatcher::ProcessPatches(const CompiledMethod* compiled_method,
uint32_t code_offset) {
for (const LinkerPatch& patch : compiled_method->GetPatches()) {
uint32_t patch_offset = code_offset + patch.LiteralOffset();
- ThunkType key_type = static_cast<ThunkType>(-1);
+ ThunkKey key(static_cast<ThunkType>(-1));
ThunkData* old_data = nullptr;
if (patch.GetType() == LinkerPatch::Type::kCallRelative) {
- key_type = ThunkType::kMethodCall;
+ key = GetMethodCallKey();
unprocessed_method_call_patches_.emplace_back(patch_offset, patch.TargetMethod());
if (method_call_thunk_ == nullptr) {
- ThunkKey key(key_type, ThunkParams{{ 0u, 0u }}); // NOLINT(whitespace/braces)
- uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key_type);
+ uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key);
auto it = thunks_.Put(key, ThunkData(CompileThunk(key), max_next_offset));
method_call_thunk_ = &it->second;
AddUnreservedThunk(method_call_thunk_);
@@ -329,11 +340,10 @@ void ArmBaseRelativePatcher::ProcessPatches(const CompiledMethod* compiled_metho
old_data = method_call_thunk_;
}
} else if (patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch) {
- ThunkKey key = GetBakerReadBarrierKey(patch);
- key_type = key.GetType();
+ key = GetBakerThunkKey(patch);
auto lb = thunks_.lower_bound(key);
if (lb == thunks_.end() || thunks_.key_comp()(key, lb->first)) {
- uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key_type);
+ uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key);
auto it = thunks_.PutBefore(lb, key, ThunkData(CompileThunk(key), max_next_offset));
AddUnreservedThunk(&it->second);
} else {
@@ -342,16 +352,16 @@ void ArmBaseRelativePatcher::ProcessPatches(const CompiledMethod* compiled_metho
}
if (old_data != nullptr) {
// Shared path where an old thunk may need an update.
- DCHECK(key_type != static_cast<ThunkType>(-1));
+ DCHECK(key.GetType() != static_cast<ThunkType>(-1));
DCHECK(!old_data->HasReservedOffset() || old_data->LastReservedOffset() < patch_offset);
if (old_data->NeedsNextThunk()) {
// Patches for a method are ordered by literal offset, so if we still need to place
// this thunk for a previous patch, that thunk shall be in range for this patch.
- DCHECK_LE(old_data->MaxNextOffset(), CalculateMaxNextOffset(patch_offset, key_type));
+ DCHECK_LE(old_data->MaxNextOffset(), CalculateMaxNextOffset(patch_offset, key));
} else {
if (!old_data->HasReservedOffset() ||
- patch_offset - old_data->LastReservedOffset() > MaxNegativeDisplacement(key_type)) {
- old_data->SetMaxNextOffset(CalculateMaxNextOffset(patch_offset, key_type));
+ patch_offset - old_data->LastReservedOffset() > MaxNegativeDisplacement(key)) {
+ old_data->SetMaxNextOffset(CalculateMaxNextOffset(patch_offset, key));
AddUnreservedThunk(old_data);
}
}
@@ -385,8 +395,8 @@ void ArmBaseRelativePatcher::ResolveMethodCalls(uint32_t quick_code_offset,
DCHECK(!unreserved_thunks_.empty());
DCHECK(!unprocessed_method_call_patches_.empty());
DCHECK(method_call_thunk_ != nullptr);
- uint32_t max_positive_displacement = MaxPositiveDisplacement(ThunkType::kMethodCall);
- uint32_t max_negative_displacement = MaxNegativeDisplacement(ThunkType::kMethodCall);
+ uint32_t max_positive_displacement = MaxPositiveDisplacement(GetMethodCallKey());
+ uint32_t max_negative_displacement = MaxNegativeDisplacement(GetMethodCallKey());
// Process as many patches as possible, stop only on unresolved targets or calls too far back.
while (!unprocessed_method_call_patches_.empty()) {
MethodReference target_method = unprocessed_method_call_patches_.front().GetTargetMethod();
@@ -439,8 +449,8 @@ void ArmBaseRelativePatcher::ResolveMethodCalls(uint32_t quick_code_offset,
}
inline uint32_t ArmBaseRelativePatcher::CalculateMaxNextOffset(uint32_t patch_offset,
- ThunkType type) {
- return RoundDown(patch_offset + MaxPositiveDisplacement(type),
+ const ThunkKey& key) {
+ return RoundDown(patch_offset + MaxPositiveDisplacement(key),
GetInstructionSetAlignment(instruction_set_));
}
diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h
index 2cb1b6c535..5197ce2549 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.h
+++ b/compiler/linker/arm/relative_patcher_arm_base.h
@@ -42,59 +42,30 @@ class ArmBaseRelativePatcher : public RelativePatcher {
enum class ThunkType {
kMethodCall, // Method call thunk.
- kBakerReadBarrierField, // Baker read barrier, load field or array element at known offset.
- kBakerReadBarrierRoot, // Baker read barrier, GC root load.
- };
-
- struct BakerReadBarrierOffsetParams {
- uint32_t holder_reg; // Holder object for reading lock word.
- uint32_t base_reg; // Base register, different from holder for large offset.
- // If base differs from holder, it should be a pre-defined
- // register to limit the number of thunks we need to emit.
- // The offset is retrieved using introspection.
- };
-
- struct BakerReadBarrierRootParams {
- uint32_t root_reg; // The register holding the GC root.
- uint32_t dummy;
- };
-
- struct RawThunkParams {
- uint32_t first;
- uint32_t second;
- };
-
- union ThunkParams {
- RawThunkParams raw_params;
- BakerReadBarrierOffsetParams offset_params;
- BakerReadBarrierRootParams root_params;
+ kBakerReadBarrier, // Baker read barrier.
};
class ThunkKey {
public:
- ThunkKey(ThunkType type, ThunkParams params) : type_(type), params_(params) { }
+ explicit ThunkKey(ThunkType type, uint32_t custom_value1 = 0u, uint32_t custom_value2 = 0u)
+ : type_(type), custom_value1_(custom_value1), custom_value2_(custom_value2) { }
ThunkType GetType() const {
return type_;
}
- BakerReadBarrierOffsetParams GetOffsetParams() const {
- DCHECK(type_ == ThunkType::kBakerReadBarrierField);
- return params_.offset_params;
+ uint32_t GetCustomValue1() const {
+ return custom_value1_;
}
- BakerReadBarrierRootParams GetRootParams() const {
- DCHECK(type_ == ThunkType::kBakerReadBarrierRoot);
- return params_.root_params;
- }
-
- RawThunkParams GetRawParams() const {
- return params_.raw_params;
+ uint32_t GetCustomValue2() const {
+ return custom_value2_;
}
private:
ThunkType type_;
- ThunkParams params_;
+ uint32_t custom_value1_;
+ uint32_t custom_value2_;
};
class ThunkKeyCompare {
@@ -103,13 +74,16 @@ class ArmBaseRelativePatcher : public RelativePatcher {
if (lhs.GetType() != rhs.GetType()) {
return lhs.GetType() < rhs.GetType();
}
- if (lhs.GetRawParams().first != rhs.GetRawParams().first) {
- return lhs.GetRawParams().first < rhs.GetRawParams().first;
+ if (lhs.GetCustomValue1() != rhs.GetCustomValue1()) {
+ return lhs.GetCustomValue1() < rhs.GetCustomValue1();
}
- return lhs.GetRawParams().second < rhs.GetRawParams().second;
+ return lhs.GetCustomValue2() < rhs.GetCustomValue2();
}
};
+ static ThunkKey GetMethodCallKey();
+ static ThunkKey GetBakerThunkKey(const LinkerPatch& patch);
+
uint32_t ReserveSpaceInternal(uint32_t offset,
const CompiledMethod* compiled_method,
MethodReference method_ref,
@@ -119,10 +93,9 @@ class ArmBaseRelativePatcher : public RelativePatcher {
uint32_t CalculateMethodCallDisplacement(uint32_t patch_offset,
uint32_t target_offset);
- virtual ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) = 0;
virtual std::vector<uint8_t> CompileThunk(const ThunkKey& key) = 0;
- virtual uint32_t MaxPositiveDisplacement(ThunkType type) = 0;
- virtual uint32_t MaxNegativeDisplacement(ThunkType type) = 0;
+ virtual uint32_t MaxPositiveDisplacement(const ThunkKey& key) = 0;
+ virtual uint32_t MaxNegativeDisplacement(const ThunkKey& key) = 0;
private:
class ThunkData;
@@ -132,7 +105,7 @@ class ArmBaseRelativePatcher : public RelativePatcher {
void ResolveMethodCalls(uint32_t quick_code_offset, MethodReference method_ref);
- uint32_t CalculateMaxNextOffset(uint32_t patch_offset, ThunkType type);
+ uint32_t CalculateMaxNextOffset(uint32_t patch_offset, const ThunkKey& key);
RelativePatcherTargetProvider* const provider_;
const InstructionSet instruction_set_;
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
index 1a5d79ce70..aa5a9457b2 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2.cc
@@ -16,9 +16,16 @@
#include "linker/arm/relative_patcher_thumb2.h"
+#include "arch/arm/asm_support_arm.h"
#include "art_method.h"
+#include "base/bit_utils.h"
#include "compiled_method.h"
-#include "utils/arm/assembler_thumb2.h"
+#include "entrypoints/quick/quick_entrypoints_enum.h"
+#include "lock_word.h"
+#include "mirror/object.h"
+#include "mirror/array-inl.h"
+#include "read_barrier.h"
+#include "utils/arm/assembler_arm_vixl.h"
namespace art {
namespace linker {
@@ -32,6 +39,12 @@ static constexpr int32_t kPcDisplacement = 4;
constexpr uint32_t kMaxMethodCallPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement;
constexpr uint32_t kMaxMethodCallNegativeDisplacement = (1u << 24) - kPcDisplacement;
+// Maximum positive and negative displacement for a conditional branch measured from the patch
+// location. (Signed 21 bit displacement with the last bit 0 has range [-2^20, 2^20-2] measured
+// from the Thumb2 PC pointing right after the B.cond, i.e. 4 bytes later than the patch location.)
+constexpr uint32_t kMaxBcondPositiveDisplacement = (1u << 20) - 2u + kPcDisplacement;
+constexpr uint32_t kMaxBcondNegativeDisplacement = (1u << 20) - kPcDisplacement;
+
Thumb2RelativePatcher::Thumb2RelativePatcher(RelativePatcherTargetProvider* provider)
: ArmBaseRelativePatcher(provider, kThumb2) {
}
@@ -84,29 +97,259 @@ void Thumb2RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
SetInsn32(code, literal_offset, insn);
}
-void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
- const LinkerPatch& patch ATTRIBUTE_UNUSED,
- uint32_t patch_offset ATTRIBUTE_UNUSED) {
- LOG(FATAL) << "UNIMPLEMENTED";
+void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
+ const LinkerPatch& patch,
+ uint32_t patch_offset) {
+ DCHECK_ALIGNED(patch_offset, 2u);
+ uint32_t literal_offset = patch.LiteralOffset();
+ DCHECK_ALIGNED(literal_offset, 2u);
+ DCHECK_LT(literal_offset, code->size());
+ uint32_t insn = GetInsn32(code, literal_offset);
+ DCHECK_EQ(insn, 0xf0408000); // BNE +0 (unpatched)
+ ThunkKey key = GetBakerThunkKey(patch);
+ if (kIsDebugBuild) {
+ const uint32_t encoded_data = key.GetCustomValue1();
+ BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+ // Check that the next instruction matches the expected LDR.
+ switch (kind) {
+ case BakerReadBarrierKind::kField: {
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+ if (width == BakerReadBarrierWidth::kWide) {
+ DCHECK_GE(code->size() - literal_offset, 8u);
+ uint32_t next_insn = GetInsn32(code, literal_offset + 4u);
+ // LDR (immediate), encoding T3, with correct base_reg.
+ CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
+ const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
+ } else {
+ DCHECK_GE(code->size() - literal_offset, 6u);
+ uint32_t next_insn = GetInsn16(code, literal_offset + 4u);
+ // LDR (immediate), encoding T1, with correct base_reg.
+ CheckValidReg(next_insn & 0x7u); // Check destination register.
+ const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3));
+ }
+ break;
+ }
+ case BakerReadBarrierKind::kArray: {
+ DCHECK_GE(code->size() - literal_offset, 8u);
+ uint32_t next_insn = GetInsn32(code, literal_offset + 4u);
+ // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]).
+ CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
+ const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16));
+ CheckValidReg(next_insn & 0xf); // Check index register
+ break;
+ }
+ case BakerReadBarrierKind::kGcRoot: {
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+ if (width == BakerReadBarrierWidth::kWide) {
+ DCHECK_GE(literal_offset, 4u);
+ uint32_t prev_insn = GetInsn32(code, literal_offset - 4u);
+ // LDR (immediate), encoding T3, with correct root_reg.
+ const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
+ } else {
+ DCHECK_GE(literal_offset, 2u);
+ uint32_t prev_insn = GetInsn16(code, literal_offset - 2u);
+ // LDR (immediate), encoding T1, with correct root_reg.
+ const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg);
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(key.GetType());
+ UNREACHABLE();
+ }
+ }
+ uint32_t target_offset = GetThunkTargetOffset(key, patch_offset);
+ DCHECK_ALIGNED(target_offset, 4u);
+ uint32_t disp = target_offset - (patch_offset + kPcDisplacement);
+ DCHECK((disp >> 20) == 0u || (disp >> 20) == 0xfffu); // 21-bit signed.
+ insn |= ((disp << (26 - 20)) & 0x04000000u) | // Shift bit 20 to 26, "S".
+ ((disp >> (19 - 11)) & 0x00000800u) | // Shift bit 19 to 13, "J1".
+ ((disp >> (18 - 13)) & 0x00002000u) | // Shift bit 18 to 11, "J2".
+ ((disp << (16 - 12)) & 0x003f0000u) | // Shift bits 12-17 to 16-25, "imm6".
+ ((disp >> (1 - 0)) & 0x000007ffu); // Shift bits 1-12 to 0-11, "imm11".
+ SetInsn32(code, literal_offset, insn);
}
-ArmBaseRelativePatcher::ThunkKey Thumb2RelativePatcher::GetBakerReadBarrierKey(
- const LinkerPatch& patch ATTRIBUTE_UNUSED) {
- LOG(FATAL) << "UNIMPLEMENTED";
- UNREACHABLE();
+#define __ assembler.GetVIXLAssembler()->
+
+static void EmitGrayCheckAndFastPath(arm::ArmVIXLAssembler& assembler,
+ vixl::aarch32::Register base_reg,
+ vixl::aarch32::MemOperand& lock_word,
+ vixl::aarch32::Label* slow_path,
+ int32_t raw_ldr_offset) {
+ using namespace vixl::aarch32; // NOLINT(build/namespaces)
+ // Load the lock word containing the rb_state.
+ __ Ldr(ip, lock_word);
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted));
+ __ B(ne, slow_path, /* is_far_target */ false);
+ __ Add(lr, lr, raw_ldr_offset);
+ // Introduce a dependency on the lock_word including rb_state,
+ // to prevent load-load reordering, and without using
+ // a memory barrier (which would be more expensive).
+ __ Add(base_reg, base_reg, Operand(ip, LSR, 32));
+ __ Bx(lr); // And return back to the function.
+ // Note: The fake dependency is unnecessary for the slow path.
+}
+
+void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler,
+ uint32_t encoded_data) {
+ using namespace vixl::aarch32; // NOLINT(build/namespaces)
+ BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+ switch (kind) {
+ case BakerReadBarrierKind::kField: {
+ // Check if the holder is gray and, if not, add fake dependency to the base register
+ // and return to the LDR instruction to load the reference. Otherwise, use introspection
+ // to load the reference and call the entrypoint (in kBakerCcEntrypointRegister)
+ // that performs further checks on the reference and marks it if needed.
+ Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
+ CheckValidReg(base_reg.GetCode());
+ Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data));
+ CheckValidReg(holder_reg.GetCode());
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+ UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+ temps.Exclude(ip);
+ // If base_reg differs from holder_reg, the offset was too large and we must have
+ // emitted an explicit null check before the load. Otherwise, we need to null-check
+ // the holder as we do not necessarily do that check before going to the thunk.
+ vixl::aarch32::Label throw_npe;
+ if (holder_reg.Is(base_reg)) {
+ __ CompareAndBranchIfZero(holder_reg, &throw_npe, /* is_far_target */ false);
+ }
+ vixl::aarch32::Label slow_path;
+ MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
+ const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide)
+ ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
+ : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET;
+ EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
+ __ Bind(&slow_path);
+ const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
+ raw_ldr_offset;
+ Register ep_reg(kBakerCcEntrypointRegister);
+ if (width == BakerReadBarrierWidth::kWide) {
+ MemOperand ldr_half_address(lr, ldr_offset + 2);
+ __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12".
+ __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12.
+ __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference.
+ } else {
+ MemOperand ldr_address(lr, ldr_offset);
+ __ Ldrh(ip, ldr_address); // Load the LDR immediate, encoding T1.
+ __ Add(ep_reg, // Adjust the entrypoint address to the entrypoint
+ ep_reg, // for narrow LDR.
+ Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET));
+ __ Ubfx(ip, ip, 6, 5); // Extract the imm5, i.e. offset / 4.
+ __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2)); // Load the reference.
+ }
+ // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
+ __ Bx(ep_reg); // Jump to the entrypoint.
+ if (holder_reg.Is(base_reg)) {
+ // Add null check slow path. The stack map is at the address pointed to by LR.
+ __ Bind(&throw_npe);
+ int32_t offset = GetThreadOffset<kArmPointerSize>(kQuickThrowNullPointer).Int32Value();
+ __ Ldr(ip, MemOperand(/* Thread* */ vixl::aarch32::r9, offset));
+ __ Bx(ip);
+ }
+ break;
+ }
+ case BakerReadBarrierKind::kArray: {
+ Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
+ CheckValidReg(base_reg.GetCode());
+ DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
+ DCHECK(BakerReadBarrierWidth::kWide == BakerReadBarrierWidthField::Decode(encoded_data));
+ UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+ temps.Exclude(ip);
+ vixl::aarch32::Label slow_path;
+ int32_t data_offset =
+ mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+ MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
+ DCHECK_LT(lock_word.GetOffsetImmediate(), 0);
+ const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
+ EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
+ __ Bind(&slow_path);
+ const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
+ raw_ldr_offset;
+ MemOperand ldr_address(lr, ldr_offset + 2);
+ __ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm",
+ // i.e. Rm+32 because the scale in imm2 is 2.
+ Register ep_reg(kBakerCcEntrypointRegister); // Insert ip to the entrypoint address to create
+ __ Bfi(ep_reg, ip, 3, 6); // a switch case target based on the index register.
+ __ Mov(ip, base_reg); // Move the base register to ip0.
+ __ Bx(ep_reg); // Jump to the entrypoint's array switch case.
+ break;
+ }
+ case BakerReadBarrierKind::kGcRoot: {
+ // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
+ // and it does not have a forwarding address), call the correct introspection entrypoint;
+ // otherwise return the reference (or the extracted forwarding address).
+ // There is no gray bit check for GC roots.
+ Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
+ CheckValidReg(root_reg.GetCode());
+ DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+ UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+ temps.Exclude(ip);
+ vixl::aarch32::Label return_label, not_marked, forwarding_address;
+ __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target */ false);
+ MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value());
+ __ Ldr(ip, lock_word);
+ __ Tst(ip, LockWord::kMarkBitStateMaskShifted);
+ __ B(eq, &not_marked);
+ __ Bind(&return_label);
+ __ Bx(lr);
+ __ Bind(&not_marked);
+ static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3,
+ "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in "
+ " the highest bits and the 'forwarding address' state to have all bits set");
+ __ Cmp(ip, Operand(0xc0000000));
+ __ B(hs, &forwarding_address);
+ // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister
+ // to art_quick_read_barrier_mark_introspection_gc_roots.
+ Register ep_reg(kBakerCcEntrypointRegister);
+ int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide)
+ ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
+ : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET;
+ __ Add(ep_reg, ep_reg, Operand(entrypoint_offset));
+ __ Mov(ip, root_reg);
+ __ Bx(ep_reg);
+ __ Bind(&forwarding_address);
+ __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift);
+ __ Bx(lr);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+ UNREACHABLE();
+ }
}
std::vector<uint8_t> Thumb2RelativePatcher::CompileThunk(const ThunkKey& key) {
- DCHECK(key.GetType() == ThunkType::kMethodCall);
- // The thunk just uses the entry point in the ArtMethod. This works even for calls
- // to the generic JNI and interpreter trampolines.
ArenaPool pool;
ArenaAllocator arena(&pool);
- arm::Thumb2Assembler assembler(&arena);
- assembler.LoadFromOffset(
- arm::kLoadWord, arm::PC, arm::R0,
- ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
- assembler.bkpt(0);
+ arm::ArmVIXLAssembler assembler(&arena);
+
+ switch (key.GetType()) {
+ case ThunkType::kMethodCall:
+ // The thunk just uses the entry point in the ArtMethod. This works even for calls
+ // to the generic JNI and interpreter trampolines.
+ assembler.LoadFromOffset(
+ arm::kLoadWord,
+ vixl::aarch32::pc,
+ vixl::aarch32::r0,
+ ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
+ __ Bkpt(0);
+ break;
+ case ThunkType::kBakerReadBarrier:
+ CompileBakerReadBarrierThunk(assembler, key.GetCustomValue1());
+ break;
+ }
+
assembler.FinalizeCode();
std::vector<uint8_t> thunk_code(assembler.CodeSize());
MemoryRegion code(thunk_code.data(), thunk_code.size());
@@ -114,19 +357,29 @@ std::vector<uint8_t> Thumb2RelativePatcher::CompileThunk(const ThunkKey& key) {
return thunk_code;
}
-uint32_t Thumb2RelativePatcher::MaxPositiveDisplacement(ThunkType type) {
- DCHECK(type == ThunkType::kMethodCall);
- return kMaxMethodCallPositiveDisplacement;
+#undef __
+
+uint32_t Thumb2RelativePatcher::MaxPositiveDisplacement(const ThunkKey& key) {
+ switch (key.GetType()) {
+ case ThunkType::kMethodCall:
+ return kMaxMethodCallPositiveDisplacement;
+ case ThunkType::kBakerReadBarrier:
+ return kMaxBcondPositiveDisplacement;
+ }
}
-uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(ThunkType type) {
- DCHECK(type == ThunkType::kMethodCall);
- return kMaxMethodCallNegativeDisplacement;
+uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) {
+ switch (key.GetType()) {
+ case ThunkType::kMethodCall:
+ return kMaxMethodCallNegativeDisplacement;
+ case ThunkType::kBakerReadBarrier:
+ return kMaxBcondNegativeDisplacement;
+ }
}
void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) {
DCHECK_LE(offset + 4u, code->size());
- DCHECK_EQ(offset & 1u, 0u);
+ DCHECK_ALIGNED(offset, 2u);
uint8_t* addr = &(*code)[offset];
addr[0] = (value >> 16) & 0xff;
addr[1] = (value >> 24) & 0xff;
@@ -136,7 +389,7 @@ void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offse
uint32_t Thumb2RelativePatcher::GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset) {
DCHECK_LE(offset + 4u, code.size());
- DCHECK_EQ(offset & 1u, 0u);
+ DCHECK_ALIGNED(offset, 2u);
const uint8_t* addr = &code[offset];
return
(static_cast<uint32_t>(addr[0]) << 16) +
@@ -151,5 +404,18 @@ uint32_t Thumb2RelativePatcher::GetInsn32(Vector* code, uint32_t offset) {
return GetInsn32(ArrayRef<const uint8_t>(*code), offset);
}
+uint32_t Thumb2RelativePatcher::GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset) {
+ DCHECK_LE(offset + 2u, code.size());
+ DCHECK_ALIGNED(offset, 2u);
+ const uint8_t* addr = &code[offset];
+ return (static_cast<uint32_t>(addr[0]) << 0) + (static_cast<uint32_t>(addr[1]) << 8);
+}
+
+template <typename Vector>
+uint32_t Thumb2RelativePatcher::GetInsn16(Vector* code, uint32_t offset) {
+ static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+ return GetInsn16(ArrayRef<const uint8_t>(*code), offset);
+}
+
} // namespace linker
} // namespace art
diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h
index ab37802d0f..183e5e6c96 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.h
+++ b/compiler/linker/arm/relative_patcher_thumb2.h
@@ -17,13 +17,57 @@
#ifndef ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_
#define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_
+#include "arch/arm/registers_arm.h"
+#include "base/array_ref.h"
+#include "base/bit_field.h"
+#include "base/bit_utils.h"
#include "linker/arm/relative_patcher_arm_base.h"
namespace art {
+
+namespace arm {
+class ArmVIXLAssembler;
+} // namespace arm
+
namespace linker {
class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher {
public:
+ static constexpr uint32_t kBakerCcEntrypointRegister = 4u;
+
+ static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg,
+ uint32_t holder_reg,
+ bool narrow) {
+ CheckValidReg(base_reg);
+ CheckValidReg(holder_reg);
+ DCHECK(!narrow || base_reg < 8u) << base_reg;
+ BakerReadBarrierWidth width =
+ narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide;
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) |
+ BakerReadBarrierFirstRegField::Encode(base_reg) |
+ BakerReadBarrierSecondRegField::Encode(holder_reg) |
+ BakerReadBarrierWidthField::Encode(width);
+ }
+
+ static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
+ CheckValidReg(base_reg);
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
+ BakerReadBarrierFirstRegField::Encode(base_reg) |
+ BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) |
+ BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide);
+ }
+
+ static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg, bool narrow) {
+ CheckValidReg(root_reg);
+ DCHECK(!narrow || root_reg < 8u) << root_reg;
+ BakerReadBarrierWidth width =
+ narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide;
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
+ BakerReadBarrierFirstRegField::Encode(root_reg) |
+ BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) |
+ BakerReadBarrierWidthField::Encode(width);
+ }
+
explicit Thumb2RelativePatcher(RelativePatcherTargetProvider* provider);
void PatchCall(std::vector<uint8_t>* code,
@@ -39,18 +83,58 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher {
uint32_t patch_offset) OVERRIDE;
protected:
- ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) OVERRIDE;
std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE;
- uint32_t MaxPositiveDisplacement(ThunkType type) OVERRIDE;
- uint32_t MaxNegativeDisplacement(ThunkType type) OVERRIDE;
+ uint32_t MaxPositiveDisplacement(const ThunkKey& key) OVERRIDE;
+ uint32_t MaxNegativeDisplacement(const ThunkKey& key) OVERRIDE;
private:
+ static constexpr uint32_t kInvalidEncodedReg = /* pc is invalid */ 15u;
+
+ enum class BakerReadBarrierKind : uint8_t {
+ kField, // Field get or array get with constant offset (i.e. constant index).
+ kArray, // Array get with index in register.
+ kGcRoot, // GC root load.
+ kLast = kGcRoot
+ };
+
+ enum class BakerReadBarrierWidth : uint8_t {
+ kWide, // 32-bit LDR (and 32-bit NEG if heap poisoning is enabled).
+ kNarrow, // 16-bit LDR (and 16-bit NEG if heap poisoning is enabled).
+ kLast = kNarrow
+ };
+
+ static constexpr size_t kBitsForBakerReadBarrierKind =
+ MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
+ static constexpr size_t kBitsForRegister = 4u;
+ using BakerReadBarrierKindField =
+ BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>;
+ using BakerReadBarrierFirstRegField =
+ BitField<uint32_t, kBitsForBakerReadBarrierKind, kBitsForRegister>;
+ using BakerReadBarrierSecondRegField =
+ BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>;
+ static constexpr size_t kBitsForBakerReadBarrierWidth =
+ MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierWidth::kLast));
+ using BakerReadBarrierWidthField = BitField<BakerReadBarrierWidth,
+ kBitsForBakerReadBarrierKind + 2 * kBitsForRegister,
+ kBitsForBakerReadBarrierWidth>;
+
+ static void CheckValidReg(uint32_t reg) {
+ DCHECK(reg < 12u && reg != kBakerCcEntrypointRegister) << reg;
+ }
+
+ void CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler, uint32_t encoded_data);
+
void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value);
static uint32_t GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset);
template <typename Vector>
static uint32_t GetInsn32(Vector* code, uint32_t offset);
+ static uint32_t GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset);
+
+ template <typename Vector>
+ static uint32_t GetInsn16(Vector* code, uint32_t offset);
+
friend class Thumb2RelativePatcherTest;
DISALLOW_COPY_AND_ASSIGN(Thumb2RelativePatcher);
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
index f08270d934..52e27afcf5 100644
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -14,8 +14,12 @@
* limitations under the License.
*/
+#include "base/casts.h"
#include "linker/relative_patcher_test.h"
#include "linker/arm/relative_patcher_thumb2.h"
+#include "lock_word.h"
+#include "mirror/array-inl.h"
+#include "mirror/object.h"
#include "oat_quick_method_header.h"
namespace art {
@@ -34,13 +38,102 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest {
static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode;
static const uint32_t kPcInsnOffset;
+ // The PC in Thumb mode is 4 bytes after the instruction location.
+ static constexpr uint32_t kPcAdjustment = 4u;
+
// Branches within range [-256, 256) can be created from these by adding the low 8 bits.
- static constexpr uint32_t kBlPlus0 = 0xf000f800;
- static constexpr uint32_t kBlMinus256 = 0xf7ffff00;
+ static constexpr uint32_t kBlPlus0 = 0xf000f800u;
+ static constexpr uint32_t kBlMinus256 = 0xf7ffff00u;
// Special BL values.
- static constexpr uint32_t kBlPlusMax = 0xf3ffd7ff;
- static constexpr uint32_t kBlMinusMax = 0xf400d000;
+ static constexpr uint32_t kBlPlusMax = 0xf3ffd7ffu;
+ static constexpr uint32_t kBlMinusMax = 0xf400d000u;
+
+ // BNE +0, 32-bit, encoding T3. Bits 0-10, 11, 13, 16-21, 26 are placeholder for target offset.
+ static constexpr uint32_t kBneWPlus0 = 0xf0408000u;
+
+ // LDR immediate, 16-bit, encoding T1. Bits 6-10 are imm5, 0-2 are Rt, 3-5 are Rn.
+ static constexpr uint32_t kLdrInsn = 0x6800u;
+
+ // LDR immediate, 32-bit, encoding T3. Bits 0-11 are offset, 12-15 are Rt, 16-20 are Rn.
+ static constexpr uint32_t kLdrWInsn = 0xf8d00000u;
+
+ // LDR immediate, negative offset, encoding T4. Bits 0-7 are the offset to subtract.
+ static constexpr uint32_t kLdrNegativeOffset = 0xf8500c00u;
+
+ // LDR register, lsl #2. Bits 4-5 are the imm2, i.e. the lsl shift.
+ static constexpr uint32_t kLdrRegLsl2 = 0xf8500020u;
+
+ // NOP instructions.
+ static constexpr uint32_t kNopInsn = 0xbf00u;
+ static constexpr uint32_t kNopWInsn = 0xf3af8000u;
+
+ void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) {
+ CHECK_LE(pos, code->size());
+ if (IsUint<16>(insn)) {
+ const uint8_t insn_code[] = {
+ static_cast<uint8_t>(insn),
+ static_cast<uint8_t>(insn >> 8),
+ };
+ static_assert(sizeof(insn_code) == 2u, "Invalid sizeof(insn_code).");
+ code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code));
+ } else {
+ const uint8_t insn_code[] = {
+ static_cast<uint8_t>(insn >> 16),
+ static_cast<uint8_t>(insn >> 24),
+ static_cast<uint8_t>(insn),
+ static_cast<uint8_t>(insn >> 8),
+ };
+ static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code).");
+ code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code));
+ }
+ }
+
+ void PushBackInsn(std::vector<uint8_t>* code, uint32_t insn) {
+ InsertInsn(code, code->size(), insn);
+ }
+
+ std::vector<uint8_t> GenNops(size_t num_nops) {
+ std::vector<uint8_t> result;
+ result.reserve(num_nops * 2u);
+ for (size_t i = 0; i != num_nops; ++i) {
+ PushBackInsn(&result, kNopInsn);
+ }
+ return result;
+ }
+
+ std::vector<uint8_t> RawCode(std::initializer_list<uint32_t> insns) {
+ std::vector<uint8_t> raw_code;
+ size_t number_of_16_bit_insns =
+ std::count_if(insns.begin(), insns.end(), [](uint32_t x) { return IsUint<16>(x); });
+ raw_code.reserve(insns.size() * 4u - number_of_16_bit_insns * 2u);
+ for (uint32_t insn : insns) {
+ PushBackInsn(&raw_code, insn);
+ }
+ return raw_code;
+ }
+
+ uint32_t BneWWithOffset(uint32_t bne_offset, uint32_t target_offset) {
+ if (!IsAligned<2u>(bne_offset)) {
+ LOG(ERROR) << "Unaligned bne_offset: " << bne_offset;
+ return 0xffffffffu; // Fails code diff later.
+ }
+ if (!IsAligned<2u>(target_offset)) {
+ LOG(ERROR) << "Unaligned target_offset: " << target_offset;
+ return 0xffffffffu; // Fails code diff later.
+ }
+ uint32_t diff = target_offset - bne_offset - kPcAdjustment;
+ DCHECK_ALIGNED(diff, 2u);
+ if ((diff >> 20) != 0 && (diff >> 20) != 0xfffu) {
+ LOG(ERROR) << "Target out of range: " << diff;
+ return 0xffffffffu; // Fails code diff later.
+ }
+ return kBneWPlus0 | ((diff >> 1) & 0x7ffu) // imm11
+ | (((diff >> 12) & 0x3fu) << 16) // imm6
+ | (((diff >> 18) & 1) << 13) // J1
+ | (((diff >> 19) & 1) << 11) // J2
+ | (((diff >> 20) & 1) << 26); // S
+ }
bool Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code,
const ArrayRef<const LinkerPatch>& method1_patches,
@@ -95,9 +188,7 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest {
}
std::vector<uint8_t> CompileMethodCallThunk() {
- ArmBaseRelativePatcher::ThunkKey key(
- ArmBaseRelativePatcher::ThunkType::kMethodCall,
- ArmBaseRelativePatcher::ThunkParams{{ 0, 0 }}); // NOLINT(whitespace/braces)
+ ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetMethodCallKey();
return static_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
}
@@ -125,19 +216,57 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest {
std::vector<uint8_t> result;
result.reserve(num_nops * 2u + 4u);
for (size_t i = 0; i != num_nops; ++i) {
- result.push_back(0x00);
- result.push_back(0xbf);
+ PushBackInsn(&result, kNopInsn);
}
- result.push_back(static_cast<uint8_t>(bl >> 16));
- result.push_back(static_cast<uint8_t>(bl >> 24));
- result.push_back(static_cast<uint8_t>(bl));
- result.push_back(static_cast<uint8_t>(bl >> 8));
+ PushBackInsn(&result, bl);
return result;
}
- void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset);
+ void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset);
void TestStringReference(uint32_t string_offset);
void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
+
+ std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg,
+ uint32_t holder_reg,
+ bool narrow) {
+ const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
+ 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg, narrow));
+ ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
+ return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
+ }
+
+ std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) {
+ LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
+ 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg));
+ ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
+ return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
+ }
+
+ std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg, bool narrow) {
+ LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
+ 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow));
+ ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
+ return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
+ }
+
+ uint32_t GetOutputInsn32(uint32_t offset) {
+ CHECK_LE(offset, output_.size());
+ CHECK_GE(output_.size() - offset, 4u);
+ return (static_cast<uint32_t>(output_[offset]) << 16) |
+ (static_cast<uint32_t>(output_[offset + 1]) << 24) |
+ (static_cast<uint32_t>(output_[offset + 2]) << 0) |
+ (static_cast<uint32_t>(output_[offset + 3]) << 8);
+ }
+
+ uint16_t GetOutputInsn16(uint32_t offset) {
+ CHECK_LE(offset, output_.size());
+ CHECK_GE(output_.size() - offset, 2u);
+ return (static_cast<uint32_t>(output_[offset]) << 0) |
+ (static_cast<uint32_t>(output_[offset + 1]) << 8);
+ }
+
+ void TestBakerFieldWide(uint32_t offset, uint32_t ref_reg);
+ void TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg);
};
const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = {
@@ -161,21 +290,22 @@ const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kUnpatchedPcRelativeCod
kUnpatchedPcRelativeRawCode);
const uint32_t Thumb2RelativePatcherTest::kPcInsnOffset = 8u;
-void Thumb2RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin,
- uint32_t element_offset) {
- dex_cache_arrays_begin_ = dex_cache_arrays_begin;
- LinkerPatch patches[] = {
- LinkerPatch::DexCacheArrayPatch(0u, nullptr, kPcInsnOffset, element_offset),
- LinkerPatch::DexCacheArrayPatch(4u, nullptr, kPcInsnOffset, element_offset),
+void Thumb2RelativePatcherTest::TestStringBssEntry(uint32_t bss_begin,
+ uint32_t string_entry_offset) {
+ constexpr uint32_t kStringIndex = 1u;
+ string_index_to_offset_map_.Put(kStringIndex, string_entry_offset);
+ bss_begin_ = bss_begin;
+ const LinkerPatch patches[] = {
+ LinkerPatch::StringBssEntryPatch(0u, nullptr, kPcInsnOffset, kStringIndex),
+ LinkerPatch::StringBssEntryPatch(4u, nullptr, kPcInsnOffset, kStringIndex),
};
- CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches),
- dex_cache_arrays_begin_ + element_offset);
+ CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset);
}
void Thumb2RelativePatcherTest::TestStringReference(uint32_t string_offset) {
constexpr uint32_t kStringIndex = 1u;
string_index_to_offset_map_.Put(kStringIndex, string_offset);
- LinkerPatch patches[] = {
+ const LinkerPatch patches[] = {
LinkerPatch::RelativeStringPatch(0u, nullptr, kPcInsnOffset, kStringIndex),
LinkerPatch::RelativeStringPatch(4u, nullptr, kPcInsnOffset, kStringIndex),
};
@@ -214,7 +344,7 @@ void Thumb2RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const Linker
}
TEST_F(Thumb2RelativePatcherTest, CallSelf) {
- LinkerPatch patches[] = {
+ const LinkerPatch patches[] = {
LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
};
AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
@@ -227,11 +357,11 @@ TEST_F(Thumb2RelativePatcherTest, CallSelf) {
}
TEST_F(Thumb2RelativePatcherTest, CallOther) {
- LinkerPatch method1_patches[] = {
+ const LinkerPatch method1_patches[] = {
LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
};
AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches));
- LinkerPatch method2_patches[] = {
+ const LinkerPatch method2_patches[] = {
LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
};
AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches));
@@ -254,7 +384,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOther) {
}
TEST_F(Thumb2RelativePatcherTest, CallTrampoline) {
- LinkerPatch patches[] = {
+ const LinkerPatch patches[] = {
LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
};
AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
@@ -274,7 +404,7 @@ TEST_F(Thumb2RelativePatcherTest, CallTrampolineTooFar) {
constexpr uint32_t bl_offset_in_method3 = 3u * 2u; // After NOPs.
ArrayRef<const uint8_t> method3_code(method3_raw_code);
ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
- LinkerPatch method3_patches[] = {
+ const LinkerPatch method3_patches[] = {
LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, missing_method_index),
};
@@ -303,7 +433,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarAfter) {
constexpr uint32_t bl_offset_in_method1 = 3u * 2u; // After NOPs.
ArrayRef<const uint8_t> method1_code(method1_raw_code);
ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
- LinkerPatch method1_patches[] = {
+ const LinkerPatch method1_patches[] = {
LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u),
};
@@ -325,7 +455,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarBefore) {
constexpr uint32_t bl_offset_in_method3 = 2u * 2u; // After NOPs.
ArrayRef<const uint8_t> method3_code(method3_raw_code);
ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
- LinkerPatch method3_patches[] = {
+ const LinkerPatch method3_patches[] = {
LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u),
};
@@ -347,7 +477,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarAfter) {
constexpr uint32_t bl_offset_in_method1 = 2u * 2u; // After NOPs.
ArrayRef<const uint8_t> method1_code(method1_raw_code);
ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
- LinkerPatch method1_patches[] = {
+ const LinkerPatch method1_patches[] = {
LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u),
};
@@ -382,7 +512,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarBefore) {
constexpr uint32_t bl_offset_in_method3 = 3u * 2u; // After NOPs.
ArrayRef<const uint8_t> method3_code(method3_raw_code);
ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
- LinkerPatch method3_patches[] = {
+ const LinkerPatch method3_patches[] = {
LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u),
};
@@ -405,23 +535,23 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarBefore) {
EXPECT_TRUE(CheckThunk(thunk_offset));
}
-TEST_F(Thumb2RelativePatcherTest, DexCacheReference1) {
- TestDexCacheReference(0x00ff0000u, 0x00fcu);
+TEST_F(Thumb2RelativePatcherTest, StringBssEntry1) {
+ TestStringBssEntry(0x00ff0000u, 0x00fcu);
ASSERT_LT(GetMethodOffset(1u), 0xfcu);
}
-TEST_F(Thumb2RelativePatcherTest, DexCacheReference2) {
- TestDexCacheReference(0x02ff0000u, 0x05fcu);
+TEST_F(Thumb2RelativePatcherTest, StringBssEntry2) {
+ TestStringBssEntry(0x02ff0000u, 0x05fcu);
ASSERT_LT(GetMethodOffset(1u), 0xfcu);
}
-TEST_F(Thumb2RelativePatcherTest, DexCacheReference3) {
- TestDexCacheReference(0x08ff0000u, 0x08fcu);
+TEST_F(Thumb2RelativePatcherTest, StringBssEntry3) {
+ TestStringBssEntry(0x08ff0000u, 0x08fcu);
ASSERT_LT(GetMethodOffset(1u), 0xfcu);
}
-TEST_F(Thumb2RelativePatcherTest, DexCacheReference4) {
- TestDexCacheReference(0xd0ff0000u, 0x60fcu);
+TEST_F(Thumb2RelativePatcherTest, StringBssEntry4) {
+ TestStringBssEntry(0xd0ff0000u, 0x60fcu);
ASSERT_LT(GetMethodOffset(1u), 0xfcu);
}
@@ -445,5 +575,710 @@ TEST_F(Thumb2RelativePatcherTest, StringReference4) {
ASSERT_LT(GetMethodOffset(1u), 0xfcu);
}
+void Thumb2RelativePatcherTest::TestBakerFieldWide(uint32_t offset, uint32_t ref_reg) {
+ uint32_t valid_regs[] = {
+ 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
+ 8, 9, 10, 11, // IP, SP, LR and PC are reserved.
+ };
+ DCHECK_ALIGNED(offset, 4u);
+ DCHECK_LT(offset, 4 * KB);
+ constexpr size_t kMethodCodeSize = 8u;
+ constexpr size_t kLiteralOffset = 0u;
+ uint32_t method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ for (uint32_t holder_reg : valid_regs) {
+ uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12);
+ const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr});
+ ASSERT_EQ(kMethodCodeSize, raw_code.size());
+ ArrayRef<const uint8_t> code(raw_code);
+ uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ base_reg, holder_reg, /* narrow */ false);
+ const LinkerPatch patches[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data),
+ };
+ ++method_idx;
+ AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+ }
+ }
+ Link();
+
+ // All thunks are at the end.
+ uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+ method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ for (uint32_t holder_reg : valid_regs) {
+ ++method_idx;
+ uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+ uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12);
+ const std::vector<uint8_t> expected_code = RawCode({bne, ldr});
+ ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne;
+ ASSERT_TRUE(
+ CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+ std::vector<uint8_t> expected_thunk =
+ CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ false);
+ ASSERT_GT(output_.size(), thunk_offset);
+ ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+ ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+ expected_thunk.size());
+ if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+ DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+ ASSERT_TRUE(false);
+ }
+
+ size_t gray_check_offset = thunk_offset;
+ if (holder_reg == base_reg) {
+ // Verify that the null-check uses the correct register, i.e. holder_reg.
+ if (holder_reg < 8) {
+ ASSERT_GE(output_.size() - gray_check_offset, 2u);
+ ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
+ gray_check_offset +=2u;
+ } else {
+ ASSERT_GE(output_.size() - gray_check_offset, 6u);
+ ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u);
+ ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ
+ gray_check_offset += 6u;
+ }
+ }
+ // Verify that the lock word for gray bit check is loaded from the holder address.
+ ASSERT_GE(output_.size() - gray_check_offset,
+ 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u);
+ const uint32_t load_lock_word =
+ kLdrWInsn |
+ (holder_reg << 16) |
+ (/* IP */ 12 << 12) |
+ mirror::Object::MonitorOffset().Uint32Value();
+ ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset));
+ // Verify the gray bit check.
+ DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate.
+ uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift);
+ const uint32_t tst_gray_bit_without_offset =
+ 0xf0100f00 | (/* IP */ 12 << 16)
+ | (((ror_shift >> 4) & 1) << 26) // i
+ | (((ror_shift >> 1) & 7) << 12) // imm3
+ | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift).
+ EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u));
+ EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u); // BNE
+ // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset").
+ const uint32_t fake_dependency =
+ 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00)
+ (/* IP */ 12) | // Rm = IP
+ (base_reg << 16) | // Rn = base_reg
+ (base_reg << 8); // Rd = base_reg
+ EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u));
+ // Do not check the rest of the implementation.
+
+ // The next thunk follows on the next aligned offset.
+ thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+ }
+ }
+}
+
+void Thumb2RelativePatcherTest::TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg) {
+ uint32_t valid_regs[] = {
+ 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
+ 8, 9, 10, 11, // IP, SP, LR and PC are reserved.
+ };
+ DCHECK_ALIGNED(offset, 4u);
+ DCHECK_LT(offset, 32u);
+ constexpr size_t kMethodCodeSize = 6u;
+ constexpr size_t kLiteralOffset = 0u;
+ uint32_t method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ if (base_reg >= 8u) {
+ continue;
+ }
+ for (uint32_t holder_reg : valid_regs) {
+ uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg;
+ const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr});
+ ASSERT_EQ(kMethodCodeSize, raw_code.size());
+ ArrayRef<const uint8_t> code(raw_code);
+ uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ base_reg, holder_reg, /* narrow */ true);
+ const LinkerPatch patches[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data),
+ };
+ ++method_idx;
+ AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+ }
+ }
+ Link();
+
+ // All thunks are at the end.
+ uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+ method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ if (base_reg >= 8u) {
+ continue;
+ }
+ for (uint32_t holder_reg : valid_regs) {
+ ++method_idx;
+ uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+ uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg;
+ const std::vector<uint8_t> expected_code = RawCode({bne, ldr});
+ ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne;
+ ASSERT_TRUE(
+ CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+ std::vector<uint8_t> expected_thunk =
+ CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ true);
+ ASSERT_GT(output_.size(), thunk_offset);
+ ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+ ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+ expected_thunk.size());
+ if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+ DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+ ASSERT_TRUE(false);
+ }
+
+ size_t gray_check_offset = thunk_offset;
+ if (holder_reg == base_reg) {
+ // Verify that the null-check uses the correct register, i.e. holder_reg.
+ if (holder_reg < 8) {
+ ASSERT_GE(output_.size() - gray_check_offset, 2u);
+ ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
+ gray_check_offset +=2u;
+ } else {
+ ASSERT_GE(output_.size() - gray_check_offset, 6u);
+ ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u);
+ ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ
+ gray_check_offset += 6u;
+ }
+ }
+ // Verify that the lock word for gray bit check is loaded from the holder address.
+ ASSERT_GE(output_.size() - gray_check_offset,
+ 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u);
+ const uint32_t load_lock_word =
+ kLdrWInsn |
+ (holder_reg << 16) |
+ (/* IP */ 12 << 12) |
+ mirror::Object::MonitorOffset().Uint32Value();
+ ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset));
+ // Verify the gray bit check.
+ DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate.
+ uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift);
+ const uint32_t tst_gray_bit_without_offset =
+ 0xf0100f00 | (/* IP */ 12 << 16)
+ | (((ror_shift >> 4) & 1) << 26) // i
+ | (((ror_shift >> 1) & 7) << 12) // imm3
+ | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift).
+ EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u));
+ EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u); // BNE
+ // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset").
+ const uint32_t fake_dependency =
+ 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00)
+ (/* IP */ 12) | // Rm = IP
+ (base_reg << 16) | // Rn = base_reg
+ (base_reg << 8); // Rd = base_reg
+ EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u));
+ // Do not check the rest of the implementation.
+
+ // The next thunk follows on the next aligned offset.
+ thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+ }
+ }
+}
+
+#define TEST_BAKER_FIELD_WIDE(offset, ref_reg) \
+ TEST_F(Thumb2RelativePatcherTest, \
+ BakerOffsetWide##offset##_##ref_reg) { \
+ TestBakerFieldWide(offset, ref_reg); \
+ }
+
+TEST_BAKER_FIELD_WIDE(/* offset */ 0, /* ref_reg */ 0)
+TEST_BAKER_FIELD_WIDE(/* offset */ 8, /* ref_reg */ 3)
+TEST_BAKER_FIELD_WIDE(/* offset */ 28, /* ref_reg */ 7)
+TEST_BAKER_FIELD_WIDE(/* offset */ 0xffc, /* ref_reg */ 11)
+
+#define TEST_BAKER_FIELD_NARROW(offset, ref_reg) \
+ TEST_F(Thumb2RelativePatcherTest, \
+ BakerOffsetNarrow##offset##_##ref_reg) { \
+ TestBakerFieldNarrow(offset, ref_reg); \
+ }
+
+TEST_BAKER_FIELD_NARROW(/* offset */ 0, /* ref_reg */ 0)
+TEST_BAKER_FIELD_NARROW(/* offset */ 8, /* ref_reg */ 3)
+TEST_BAKER_FIELD_NARROW(/* offset */ 28, /* ref_reg */ 7)
+
+TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) {
+ // One thunk in the middle with maximum distance branches to it from both sides.
+ // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
+ constexpr uint32_t kLiteralOffset1 = 6u;
+ const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn});
+ ArrayRef<const uint8_t> code1(raw_code1);
+ uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false);
+ const LinkerPatch patches1[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
+ };
+ AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1));
+
+ constexpr uint32_t expected_thunk_offset =
+ kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u);
+ static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned.");
+ size_t filler1_size = expected_thunk_offset -
+ RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment);
+ std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u);
+ ArrayRef<const uint8_t> filler1_code(raw_filler1_code);
+ AddCompiledMethod(MethodRef(2u), filler1_code);
+
+ // Enforce thunk reservation with a tiny method.
+ AddCompiledMethod(MethodRef(3u), kNopCode);
+
+ constexpr uint32_t kLiteralOffset2 = 4;
+ static_assert(IsAligned<kArmAlignment>(kLiteralOffset2 + kPcAdjustment),
+ "PC for BNE must be aligned.");
+
+ // Allow reaching the thunk from the very beginning of a method almost 1MiB away. Backward branch
+ // reaches the full 1MiB but we need to take PC adjustment into account. Things to subtract:
+ // - thunk size and method 3 pre-header, rounded up (padding in between if needed)
+ // - method 3 code and method 4 pre-header, rounded up (padding in between if needed)
+ // - method 4 header (let there be no padding between method 4 code and method 5 pre-header).
+ size_t thunk_size =
+ CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size();
+ size_t filler2_size =
+ 1 * MB - (kLiteralOffset2 + kPcAdjustment)
+ - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment)
+ - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment)
+ - sizeof(OatQuickMethodHeader);
+ std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u);
+ ArrayRef<const uint8_t> filler2_code(raw_filler2_code);
+ AddCompiledMethod(MethodRef(4u), filler2_code);
+
+ const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn});
+ ArrayRef<const uint8_t> code2(raw_code2);
+ const LinkerPatch patches2[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data),
+ };
+ AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2));
+
+ Link();
+
+ uint32_t first_method_offset = GetMethodOffset(1u);
+ uint32_t last_method_offset = GetMethodOffset(5u);
+ EXPECT_EQ(2 * MB, last_method_offset - first_method_offset);
+
+ const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff;
+ const uint32_t bne_max_backward = kBneWPlus0 | 0x04000000;
+ const std::vector<uint8_t> expected_code1 =
+ RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn});
+ const std::vector<uint8_t> expected_code2 = RawCode({kNopWInsn, bne_max_backward, kLdrWInsn});
+ ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1)));
+ ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkBeforeFiller) {
+ // Based on the first part of BakerOffsetThunkInTheMiddle but the BNE is one instruction
+ // earlier, so the thunk is emitted before the filler.
+ // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
+ constexpr uint32_t kLiteralOffset1 = 4u;
+ const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn, kNopInsn});
+ ArrayRef<const uint8_t> code1(raw_code1);
+ uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false);
+ const LinkerPatch patches1[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
+ };
+ AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1));
+
+ constexpr uint32_t expected_thunk_offset =
+ kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement + 2 */ (1u << 20);
+ static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned.");
+ size_t filler1_size = expected_thunk_offset -
+ RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment);
+ std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u);
+ ArrayRef<const uint8_t> filler1_code(raw_filler1_code);
+ AddCompiledMethod(MethodRef(2u), filler1_code);
+
+ Link();
+
+ const uint32_t bne = BneWWithOffset(kLiteralOffset1, RoundUp(raw_code1.size(), kArmAlignment));
+ const std::vector<uint8_t> expected_code1 = RawCode({kNopWInsn, bne, kLdrWInsn, kNopInsn});
+ ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddleUnreachableFromLast) {
+ // Based on the BakerOffsetThunkInTheMiddle but the BNE in the last method is preceded
+ // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end.
+ // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
+ constexpr uint32_t kLiteralOffset1 = 6u;
+ const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn});
+ ArrayRef<const uint8_t> code1(raw_code1);
+ uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false);
+ const LinkerPatch patches1[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
+ };
+ AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1));
+
+ constexpr uint32_t expected_thunk_offset =
+ kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u);
+ static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned.");
+ size_t filler1_size = expected_thunk_offset -
+ RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment);
+ std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u);
+ ArrayRef<const uint8_t> filler1_code(raw_filler1_code);
+ AddCompiledMethod(MethodRef(2u), filler1_code);
+
+ // Enforce thunk reservation with a tiny method.
+ AddCompiledMethod(MethodRef(3u), kNopCode);
+
+ constexpr uint32_t kReachableFromOffset2 = 4;
+ constexpr uint32_t kLiteralOffset2 = kReachableFromOffset2 + 2;
+ static_assert(IsAligned<kArmAlignment>(kReachableFromOffset2 + kPcAdjustment),
+ "PC for BNE must be aligned.");
+
+ // If not for the extra NOP, this would allow reaching the thunk from the BNE
+ // of a method 1MiB away. Backward branch reaches the full 1MiB but we need to take
+ // PC adjustment into account. Things to subtract:
+ // - thunk size and method 3 pre-header, rounded up (padding in between if needed)
+ // - method 3 code and method 4 pre-header, rounded up (padding in between if needed)
+ // - method 4 header (let there be no padding between method 4 code and method 5 pre-header).
+ size_t thunk_size =
+ CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size();
+ size_t filler2_size =
+ 1 * MB - (kReachableFromOffset2 + kPcAdjustment)
+ - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment)
+ - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment)
+ - sizeof(OatQuickMethodHeader);
+ std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u);
+ ArrayRef<const uint8_t> filler2_code(raw_filler2_code);
+ AddCompiledMethod(MethodRef(4u), filler2_code);
+
+ // Extra 16-bit NOP compared to BakerOffsetThunkInTheMiddle.
+ const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn});
+ ArrayRef<const uint8_t> code2(raw_code2);
+ const LinkerPatch patches2[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data),
+ };
+ AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2));
+
+ Link();
+
+ uint32_t first_method_offset = GetMethodOffset(1u);
+ uint32_t last_method_offset = GetMethodOffset(5u);
+ EXPECT_EQ(2 * MB, last_method_offset - first_method_offset);
+
+ const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff;
+ const uint32_t bne_last =
+ BneWWithOffset(kLiteralOffset2, RoundUp(raw_code2.size(), kArmAlignment));
+ const std::vector<uint8_t> expected_code1 =
+ RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn});
+ const std::vector<uint8_t> expected_code2 =
+ RawCode({kNopWInsn, kNopInsn, bne_last, kLdrWInsn});
+ ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1)));
+ ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerArray) {
+ uint32_t valid_regs[] = {
+ 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
+ 8, 9, 10, 11, // IP, SP, LR and PC are reserved.
+ };
+ auto ldr = [](uint32_t base_reg) {
+ uint32_t index_reg = (base_reg == 0u) ? 1u : 0u;
+ uint32_t ref_reg = (base_reg == 2) ? 3u : 2u;
+ return kLdrRegLsl2 | index_reg | (base_reg << 16) | (ref_reg << 12);
+ };
+ constexpr size_t kMethodCodeSize = 8u;
+ constexpr size_t kLiteralOffset = 0u;
+ uint32_t method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ ++method_idx;
+ const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr(base_reg)});
+ ASSERT_EQ(kMethodCodeSize, raw_code.size());
+ ArrayRef<const uint8_t> code(raw_code);
+ const LinkerPatch patches[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(
+ kLiteralOffset, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)),
+ };
+ AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+ }
+ Link();
+
+ // All thunks are at the end.
+ uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+ method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ ++method_idx;
+ uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+ const std::vector<uint8_t> expected_code = RawCode({bne, ldr(base_reg)});
+ ASSERT_EQ(kMethodCodeSize, expected_code.size());
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+ std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg);
+ ASSERT_GT(output_.size(), thunk_offset);
+ ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+ ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+ expected_thunk.size());
+ if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+ DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+ ASSERT_TRUE(false);
+ }
+
+ // Verify that the lock word for gray bit check is loaded from the correct address
+ // before the base_reg which points to the array data.
+ ASSERT_GE(output_.size() - thunk_offset,
+ 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u);
+ int32_t data_offset =
+ mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+ int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset;
+ ASSERT_LT(offset, 0);
+ ASSERT_GT(offset, -256);
+ const uint32_t load_lock_word =
+ kLdrNegativeOffset |
+ (-offset & 0xffu) |
+ (base_reg << 16) |
+ (/* IP */ 12 << 12);
+ EXPECT_EQ(load_lock_word, GetOutputInsn32(thunk_offset));
+ // Verify the gray bit check.
+ DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate.
+ uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift);
+ const uint32_t tst_gray_bit_without_offset =
+ 0xf0100f00 | (/* IP */ 12 << 16)
+ | (((ror_shift >> 4) & 1) << 26) // i
+ | (((ror_shift >> 1) & 7) << 12) // imm3
+ | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift).
+ EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(thunk_offset + 4u));
+ EXPECT_EQ(0xd100u, GetOutputInsn16(thunk_offset + 8u) & 0xff00u); // BNE
+ // Verify the fake dependency.
+ const uint32_t fake_dependency =
+ 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00)
+ (/* IP */ 12) | // Rm = IP
+ (base_reg << 16) | // Rn = base_reg
+ (base_reg << 8); // Rd = base_reg
+ EXPECT_EQ(fake_dependency, GetOutputInsn32(thunk_offset + 14u));
+ // Do not check the rest of the implementation.
+
+ // The next thunk follows on the next aligned offset.
+ thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+ }
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerGcRootWide) {
+ uint32_t valid_regs[] = {
+ 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
+ 8, 9, 10, 11, // IP, SP, LR and PC are reserved.
+ };
+ constexpr size_t kMethodCodeSize = 8u;
+ constexpr size_t kLiteralOffset = 4u;
+ uint32_t method_idx = 0u;
+ for (uint32_t root_reg : valid_regs) {
+ ++method_idx;
+ uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12);
+ const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0});
+ ASSERT_EQ(kMethodCodeSize, raw_code.size());
+ ArrayRef<const uint8_t> code(raw_code);
+ const LinkerPatch patches[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(
+ kLiteralOffset,
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ false)),
+ };
+ AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+ }
+ Link();
+
+ // All thunks are at the end.
+ uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+ method_idx = 0u;
+ for (uint32_t root_reg : valid_regs) {
+ ++method_idx;
+ uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+ uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12);
+ const std::vector<uint8_t> expected_code = RawCode({ldr, bne});
+ ASSERT_EQ(kMethodCodeSize, expected_code.size());
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+ std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ false);
+ ASSERT_GT(output_.size(), thunk_offset);
+ ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+ ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+ expected_thunk.size());
+ if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+ DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+ ASSERT_TRUE(false);
+ }
+
+ // Verify that the fast-path null-check uses the correct register, i.e. root_reg.
+ if (root_reg < 8) {
+ ASSERT_GE(output_.size() - thunk_offset, 2u);
+ ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
+ } else {
+ ASSERT_GE(output_.size() - thunk_offset, 6u);
+ ASSERT_EQ(0xf1b00f00u | (root_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u);
+ ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ
+ }
+ // Do not check the rest of the implementation.
+
+ // The next thunk follows on the next aligned offset.
+ thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+ }
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerGcRootNarrow) {
+ uint32_t valid_regs[] = {
+ 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
+ // Not appplicable to high registers.
+ };
+ constexpr size_t kMethodCodeSize = 6u;
+ constexpr size_t kLiteralOffset = 2u;
+ uint32_t method_idx = 0u;
+ for (uint32_t root_reg : valid_regs) {
+ ++method_idx;
+ uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg;
+ const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0});
+ ASSERT_EQ(kMethodCodeSize, raw_code.size());
+ ArrayRef<const uint8_t> code(raw_code);
+ const LinkerPatch patches[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(
+ kLiteralOffset,
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ true)),
+ };
+ AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+ }
+ Link();
+
+ // All thunks are at the end.
+ uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+ method_idx = 0u;
+ for (uint32_t root_reg : valid_regs) {
+ ++method_idx;
+ uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+ uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg;
+ const std::vector<uint8_t> expected_code = RawCode({ldr, bne});
+ ASSERT_EQ(kMethodCodeSize, expected_code.size());
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+ std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ true);
+ ASSERT_GT(output_.size(), thunk_offset);
+ ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+ ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+ expected_thunk.size());
+ if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+ DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+ ASSERT_TRUE(false);
+ }
+
+ // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg.
+ ASSERT_GE(output_.size() - thunk_offset, 2u);
+ ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
+ // Do not check the rest of the implementation.
+
+ // The next thunk follows on the next aligned offset.
+ thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+ }
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerGcRootOffsetBits) {
+ // Test 1MiB of patches to the same thunk to stress-test different large offsets.
+ // (The low bits are not that important but the location of the high bits is easy to get wrong.)
+ std::vector<uint8_t> code;
+ code.reserve(1 * MB);
+ const size_t num_patches = 1 * MB / 8u;
+ std::vector<LinkerPatch> patches;
+ patches.reserve(num_patches);
+ const uint32_t ldr =
+ kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (/* root_reg */ 0 << 12);
+ uint32_t encoded_data =
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 0, /* narrow */ false);
+ for (size_t i = 0; i != num_patches; ++i) {
+ PushBackInsn(&code, ldr);
+ PushBackInsn(&code, kBneWPlus0);
+ patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data));
+ }
+ ASSERT_EQ(1 * MB, code.size());
+ ASSERT_EQ(num_patches, patches.size());
+ AddCompiledMethod(MethodRef(1u),
+ ArrayRef<const uint8_t>(code),
+ ArrayRef<const LinkerPatch>(patches));
+ Link();
+
+ // The thunk is right after the method code.
+ DCHECK_ALIGNED(1 * MB, kArmAlignment);
+ std::vector<uint8_t> expected_code;
+ for (size_t i = 0; i != num_patches; ++i) {
+ PushBackInsn(&expected_code, ldr);
+ PushBackInsn(&expected_code, BneWWithOffset(8u * i + 4u, 1 * MB));
+ patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data));
+ }
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerAndMethodCallInteraction) {
+ // During development, there was a `DCHECK_LE(MaxNextOffset(), next_thunk.MaxNextOffset());`
+ // in `ArmBaseRelativePatcher::ThunkData::MakeSpaceBefore()` which does not necessarily
+ // hold when we're reserving thunks of different sizes. This test exposes the situation
+ // by using Baker thunks and a method call thunk.
+
+ // Add a method call patch that can reach to method 1 offset + 16MiB.
+ uint32_t method_idx = 0u;
+ constexpr size_t kMethodCallLiteralOffset = 2u;
+ constexpr uint32_t kMissingMethodIdx = 2u;
+ const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kBlPlus0});
+ const LinkerPatch method1_patches[] = {
+ LinkerPatch::RelativeCodePatch(kMethodCallLiteralOffset, nullptr, 2u),
+ };
+ ArrayRef<const uint8_t> code1(raw_code1);
+ ++method_idx;
+ AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(method1_patches));
+
+ // Skip kMissingMethodIdx.
+ ++method_idx;
+ ASSERT_EQ(kMissingMethodIdx, method_idx);
+ // Add a method with the right size that the method code for the next one starts 1MiB
+ // after code for method 1.
+ size_t filler_size =
+ 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment)
+ - sizeof(OatQuickMethodHeader);
+ std::vector<uint8_t> filler_code = GenNops(filler_size / 2u);
+ ++method_idx;
+ AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code));
+ // Add 14 methods with 1MiB code+header, making the code for the next method start 1MiB
+ // before the currently scheduled MaxNextOffset() for the method call thunk.
+ for (uint32_t i = 0; i != 14; ++i) {
+ filler_size = 1 * MB - sizeof(OatQuickMethodHeader);
+ filler_code = GenNops(filler_size / 2u);
+ ++method_idx;
+ AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code));
+ }
+
+ // Add 2 Baker GC root patches to the last method, one that would allow the thunk at
+ // 1MiB + kArmAlignment, i.e. kArmAlignment after the method call thunk, and the
+ // second that needs it kArmAlignment after that. Given the size of the GC root thunk
+ // is more than the space required by the method call thunk plus kArmAlignment,
+ // this pushes the first GC root thunk's pending MaxNextOffset() before the method call
+ // thunk's pending MaxNextOffset() which needs to be adjusted.
+ ASSERT_LT(RoundUp(CompileMethodCallThunk().size(), kArmAlignment) + kArmAlignment,
+ CompileBakerGcRootThunk(/* root_reg */ 0, /* narrow */ false).size());
+ static_assert(kArmAlignment == 8, "Code below assumes kArmAlignment == 8");
+ constexpr size_t kBakerLiteralOffset1 = kArmAlignment + 2u - kPcAdjustment;
+ constexpr size_t kBakerLiteralOffset2 = kBakerLiteralOffset1 + kArmAlignment;
+ // Use offset = 0, base_reg = 0, the LDR is simply `kLdrWInsn | (root_reg << 12)`.
+ const uint32_t ldr1 = kLdrWInsn | (/* root_reg */ 1 << 12);
+ const uint32_t ldr2 = kLdrWInsn | (/* root_reg */ 2 << 12);
+ const std::vector<uint8_t> last_method_raw_code = RawCode({
+ kNopInsn, // Padding before first GC root read barrier.
+ ldr1, kBneWPlus0, // First GC root LDR with read barrier.
+ ldr2, kBneWPlus0, // Second GC root LDR with read barrier.
+ });
+ uint32_t encoded_data1 =
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1, /* narrow */ false);
+ uint32_t encoded_data2 =
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2, /* narrow */ false);
+ const LinkerPatch last_method_patches[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset1, encoded_data1),
+ LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset2, encoded_data2),
+ };
+ ++method_idx;
+ AddCompiledMethod(MethodRef(method_idx),
+ ArrayRef<const uint8_t>(last_method_raw_code),
+ ArrayRef<const LinkerPatch>(last_method_patches));
+
+ // The main purpose of the test is to check that Link() does not cause a crash.
+ Link();
+
+ ASSERT_EQ(15 * MB, GetMethodOffset(method_idx) - GetMethodOffset(1u));
+}
+
} // namespace linker
} // namespace art
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 551c73b2a4..117684a66b 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -29,6 +29,7 @@
#include "mirror/array-inl.h"
#include "oat.h"
#include "oat_quick_method_header.h"
+#include "read_barrier.h"
#include "utils/arm64/assembler_arm64.h"
namespace art {
@@ -53,13 +54,11 @@ constexpr uint32_t kAdrpThunkSize = 8u;
inline bool IsAdrpPatch(const LinkerPatch& patch) {
switch (patch.GetType()) {
- case LinkerPatch::Type::kMethod:
case LinkerPatch::Type::kCall:
case LinkerPatch::Type::kCallRelative:
- case LinkerPatch::Type::kType:
- case LinkerPatch::Type::kString:
case LinkerPatch::Type::kBakerReadBarrierBranch:
return false;
+ case LinkerPatch::Type::kMethodRelative:
case LinkerPatch::Type::kTypeRelative:
case LinkerPatch::Type::kTypeBssEntry:
case LinkerPatch::Type::kStringRelative:
@@ -251,11 +250,13 @@ void Arm64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
if ((insn & 0xfffffc00) == 0x91000000) {
// ADD immediate, 64-bit with imm12 == 0 (unset).
if (!kEmitCompilerReadBarrier) {
- DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative ||
+ DCHECK(patch.GetType() == LinkerPatch::Type::kMethodRelative ||
+ patch.GetType() == LinkerPatch::Type::kStringRelative ||
patch.GetType() == LinkerPatch::Type::kTypeRelative) << patch.GetType();
} else {
// With the read barrier (non-Baker) enabled, it could be kStringBssEntry or kTypeBssEntry.
- DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative ||
+ DCHECK(patch.GetType() == LinkerPatch::Type::kMethodRelative ||
+ patch.GetType() == LinkerPatch::Type::kStringRelative ||
patch.GetType() == LinkerPatch::Type::kTypeRelative ||
patch.GetType() == LinkerPatch::Type::kStringBssEntry ||
patch.GetType() == LinkerPatch::Type::kTypeBssEntry) << patch.GetType();
@@ -304,27 +305,42 @@ void Arm64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* cod
DCHECK_LT(literal_offset, code->size());
uint32_t insn = GetInsn(code, literal_offset);
DCHECK_EQ(insn & 0xffffffe0u, 0xb5000000); // CBNZ Xt, +0 (unpatched)
- ThunkKey key = GetBakerReadBarrierKey(patch);
+ ThunkKey key = GetBakerThunkKey(patch);
if (kIsDebugBuild) {
+ const uint32_t encoded_data = key.GetCustomValue1();
+ BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
// Check that the next instruction matches the expected LDR.
- switch (key.GetType()) {
- case ThunkType::kBakerReadBarrierField: {
+ switch (kind) {
+ case BakerReadBarrierKind::kField: {
DCHECK_GE(code->size() - literal_offset, 8u);
uint32_t next_insn = GetInsn(code, literal_offset + 4u);
// LDR (immediate) with correct base_reg.
CheckValidReg(next_insn & 0x1fu); // Check destination register.
- CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (key.GetOffsetParams().base_reg << 5));
+ const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5));
break;
}
- case ThunkType::kBakerReadBarrierRoot: {
+ case BakerReadBarrierKind::kArray: {
+ DCHECK_GE(code->size() - literal_offset, 8u);
+ uint32_t next_insn = GetInsn(code, literal_offset + 4u);
+ // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL),
+ // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2].
+ CheckValidReg(next_insn & 0x1fu); // Check destination register.
+ const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5));
+ CheckValidReg((next_insn >> 16) & 0x1f); // Check index register
+ break;
+ }
+ case BakerReadBarrierKind::kGcRoot: {
DCHECK_GE(literal_offset, 4u);
uint32_t prev_insn = GetInsn(code, literal_offset - 4u);
// LDR (immediate) with correct root_reg.
- CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | key.GetRootParams().root_reg);
+ const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg);
break;
}
default:
- LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(key.GetType());
+ LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
UNREACHABLE();
}
}
@@ -336,40 +352,6 @@ void Arm64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* cod
SetInsn(code, literal_offset, insn);
}
-ArmBaseRelativePatcher::ThunkKey Arm64RelativePatcher::GetBakerReadBarrierKey(
- const LinkerPatch& patch) {
- DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kBakerReadBarrierBranch);
- uint32_t value = patch.GetBakerCustomValue1();
- BakerReadBarrierKind type = BakerReadBarrierKindField::Decode(value);
- ThunkParams params;
- switch (type) {
- case BakerReadBarrierKind::kField:
- params.offset_params.base_reg = BakerReadBarrierFirstRegField::Decode(value);
- CheckValidReg(params.offset_params.base_reg);
- params.offset_params.holder_reg = BakerReadBarrierSecondRegField::Decode(value);
- CheckValidReg(params.offset_params.holder_reg);
- break;
- case BakerReadBarrierKind::kGcRoot:
- params.root_params.root_reg = BakerReadBarrierFirstRegField::Decode(value);
- CheckValidReg(params.root_params.root_reg);
- params.root_params.dummy = 0u;
- DCHECK_EQ(BakerReadBarrierSecondRegField::Decode(value), kInvalidEncodedReg);
- break;
- default:
- LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(type);
- UNREACHABLE();
- }
- constexpr uint8_t kTypeTranslationOffset = 1u;
- static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kField) + kTypeTranslationOffset ==
- static_cast<uint32_t>(ThunkType::kBakerReadBarrierField),
- "Thunk type translation check.");
- static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kGcRoot) + kTypeTranslationOffset ==
- static_cast<uint32_t>(ThunkType::kBakerReadBarrierRoot),
- "Thunk type translation check.");
- return ThunkKey(static_cast<ThunkType>(static_cast<uint32_t>(type) + kTypeTranslationOffset),
- params);
-}
-
#define __ assembler.GetVIXLAssembler()->
static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
@@ -394,33 +376,27 @@ static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
// Introduce a dependency on the lock_word including rb_state,
// to prevent load-load reordering, and without using
// a memory barrier (which would be more expensive).
- __ Add(base_reg, base_reg, Operand(vixl::aarch64::ip0, LSR, 32));
+ __ Add(base_reg, base_reg, Operand(ip0, LSR, 32));
__ Br(lr); // And return back to the function.
// Note: The fake dependency is unnecessary for the slow path.
}
-std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) {
+void Arm64RelativePatcher::CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler,
+ uint32_t encoded_data) {
using namespace vixl::aarch64; // NOLINT(build/namespaces)
- ArenaPool pool;
- ArenaAllocator arena(&pool);
- arm64::Arm64Assembler assembler(&arena);
-
- switch (key.GetType()) {
- case ThunkType::kMethodCall: {
- // The thunk just uses the entry point in the ArtMethod. This works even for calls
- // to the generic JNI and interpreter trampolines.
- Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
- kArm64PointerSize).Int32Value());
- assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
- break;
- }
- case ThunkType::kBakerReadBarrierField: {
+ BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+ switch (kind) {
+ case BakerReadBarrierKind::kField: {
// Check if the holder is gray and, if not, add fake dependency to the base register
// and return to the LDR instruction to load the reference. Otherwise, use introspection
// to load the reference and call the entrypoint (in IP1) that performs further checks
// on the reference and marks it if needed.
- auto holder_reg = Register::GetXRegFromCode(key.GetOffsetParams().holder_reg);
- auto base_reg = Register::GetXRegFromCode(key.GetOffsetParams().base_reg);
+ auto base_reg =
+ Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+ CheckValidReg(base_reg.GetCode());
+ auto holder_reg =
+ Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data));
+ CheckValidReg(holder_reg.GetCode());
UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
temps.Exclude(ip0, ip1);
// If base_reg differs from holder_reg, the offset was too large and we must have
@@ -444,17 +420,43 @@ std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) {
// Add null check slow path. The stack map is at the address pointed to by LR.
__ Bind(&throw_npe);
int32_t offset = GetThreadOffset<kArm64PointerSize>(kQuickThrowNullPointer).Int32Value();
- __ Ldr(ip0, MemOperand(vixl::aarch64::x19, offset));
+ __ Ldr(ip0, MemOperand(/* Thread* */ vixl::aarch64::x19, offset));
__ Br(ip0);
}
break;
}
- case ThunkType::kBakerReadBarrierRoot: {
+ case BakerReadBarrierKind::kArray: {
+ auto base_reg =
+ Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+ CheckValidReg(base_reg.GetCode());
+ DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
+ UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+ temps.Exclude(ip0, ip1);
+ vixl::aarch64::Label slow_path;
+ int32_t data_offset =
+ mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+ MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
+ DCHECK_LT(lock_word.GetOffset(), 0);
+ EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
+ __ Bind(&slow_path);
+ MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
+ __ Ldr(ip0.W(), ldr_address); // Load the LDR (register) unsigned offset.
+ __ Ubfx(ip0, ip0, 16, 6); // Extract the index register, plus 32 (bit 21 is set).
+ __ Bfi(ip1, ip0, 3, 6); // Insert ip0 to the entrypoint address to create
+ // a switch case target based on the index register.
+ __ Mov(ip0, base_reg); // Move the base register to ip0.
+ __ Br(ip1); // Jump to the entrypoint's array switch case.
+ break;
+ }
+ case BakerReadBarrierKind::kGcRoot: {
// Check if the reference needs to be marked and if so (i.e. not null, not marked yet
// and it does not have a forwarding address), call the correct introspection entrypoint;
// otherwise return the reference (or the extracted forwarding address).
// There is no gray bit check for GC roots.
- auto root_reg = Register::GetWRegFromCode(key.GetRootParams().root_reg);
+ auto root_reg =
+ Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+ CheckValidReg(root_reg.GetCode());
+ DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
temps.Exclude(ip0, ip1);
vixl::aarch64::Label return_label, not_marked, forwarding_address;
@@ -477,6 +479,30 @@ std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) {
__ Br(lr);
break;
}
+ default:
+ LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+ UNREACHABLE();
+ }
+}
+
+std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) {
+ ArenaPool pool;
+ ArenaAllocator arena(&pool);
+ arm64::Arm64Assembler assembler(&arena);
+
+ switch (key.GetType()) {
+ case ThunkType::kMethodCall: {
+ // The thunk just uses the entry point in the ArtMethod. This works even for calls
+ // to the generic JNI and interpreter trampolines.
+ Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ kArm64PointerSize).Int32Value());
+ assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
+ break;
+ }
+ case ThunkType::kBakerReadBarrier: {
+ CompileBakerReadBarrierThunk(assembler, key.GetCustomValue1());
+ break;
+ }
}
// Ensure we emit the literal pool.
@@ -489,22 +515,20 @@ std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) {
#undef __
-uint32_t Arm64RelativePatcher::MaxPositiveDisplacement(ThunkType type) {
- switch (type) {
+uint32_t Arm64RelativePatcher::MaxPositiveDisplacement(const ThunkKey& key) {
+ switch (key.GetType()) {
case ThunkType::kMethodCall:
return kMaxMethodCallPositiveDisplacement;
- case ThunkType::kBakerReadBarrierField:
- case ThunkType::kBakerReadBarrierRoot:
+ case ThunkType::kBakerReadBarrier:
return kMaxBcondPositiveDisplacement;
}
}
-uint32_t Arm64RelativePatcher::MaxNegativeDisplacement(ThunkType type) {
- switch (type) {
+uint32_t Arm64RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) {
+ switch (key.GetType()) {
case ThunkType::kMethodCall:
return kMaxMethodCallNegativeDisplacement;
- case ThunkType::kBakerReadBarrierField:
- case ThunkType::kBakerReadBarrierRoot:
+ case ThunkType::kBakerReadBarrier:
return kMaxBcondNegativeDisplacement;
}
}
@@ -543,10 +567,10 @@ bool Arm64RelativePatcher::NeedsErratum843419Thunk(ArrayRef<const uint8_t> code,
return false;
}
- // And since LinkerPatch::Type::kStringRelative is using the result of the ADRP
- // for an ADD immediate, check for that as well. We generalize a bit to include
- // ADD/ADDS/SUB/SUBS immediate that either uses the ADRP destination or stores
- // the result to a different register.
+ // And since LinkerPatch::Type::k{Method,Type,String}Relative is using the result
+ // of the ADRP for an ADD immediate, check for that as well. We generalize a bit
+ // to include ADD/ADDS/SUB/SUBS immediate that either uses the ADRP destination
+ // or stores the result to a different register.
if ((next_insn & 0x1f000000) == 0x11000000 &&
((((next_insn >> 5) ^ adrp) & 0x1f) == 0 || ((next_insn ^ adrp) & 0x1f) != 0)) {
return false;
diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h
index 7887cea5e6..b00dd081b6 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.h
+++ b/compiler/linker/arm64/relative_patcher_arm64.h
@@ -19,19 +19,19 @@
#include "base/array_ref.h"
#include "base/bit_field.h"
+#include "base/bit_utils.h"
#include "linker/arm/relative_patcher_arm_base.h"
namespace art {
+
+namespace arm64 {
+class Arm64Assembler;
+} // namespace arm64
+
namespace linker {
class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher {
public:
- enum class BakerReadBarrierKind : uint8_t {
- kField, // Field get or array get with constant offset (i.e. constant index).
- kGcRoot, // GC root load.
- kLast
- };
-
static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) {
CheckValidReg(base_reg);
CheckValidReg(holder_reg);
@@ -40,6 +40,13 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher {
BakerReadBarrierSecondRegField::Encode(holder_reg);
}
+ static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
+ CheckValidReg(base_reg);
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
+ BakerReadBarrierFirstRegField::Encode(base_reg) |
+ BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg);
+ }
+
static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) {
CheckValidReg(root_reg);
return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
@@ -68,14 +75,20 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher {
uint32_t patch_offset) OVERRIDE;
protected:
- static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u;
-
- ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) OVERRIDE;
std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE;
- uint32_t MaxPositiveDisplacement(ThunkType type) OVERRIDE;
- uint32_t MaxNegativeDisplacement(ThunkType type) OVERRIDE;
+ uint32_t MaxPositiveDisplacement(const ThunkKey& key) OVERRIDE;
+ uint32_t MaxNegativeDisplacement(const ThunkKey& key) OVERRIDE;
private:
+ static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u;
+
+ enum class BakerReadBarrierKind : uint8_t {
+ kField, // Field get or array get with constant offset (i.e. constant index).
+ kArray, // Array get with index in register.
+ kGcRoot, // GC root load.
+ kLast = kGcRoot
+ };
+
static constexpr size_t kBitsForBakerReadBarrierKind =
MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
static constexpr size_t kBitsForRegister = 5u;
@@ -87,9 +100,11 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher {
BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>;
static void CheckValidReg(uint32_t reg) {
- DCHECK(reg < 30u && reg != 16u && reg != 17u);
+ DCHECK(reg < 30u && reg != 16u && reg != 17u) << reg;
}
+ void CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler, uint32_t encoded_data);
+
static uint32_t PatchAdrp(uint32_t adrp, uint32_t disp);
static bool NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, uint32_t literal_offset,
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
index b4d35ab2a7..5d02d449fe 100644
--- a/compiler/linker/arm64/relative_patcher_arm64_test.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc
@@ -18,6 +18,7 @@
#include "linker/relative_patcher_test.h"
#include "linker/arm64/relative_patcher_arm64.h"
#include "lock_word.h"
+#include "mirror/array-inl.h"
#include "mirror/object.h"
#include "oat_quick_method_header.h"
@@ -46,9 +47,15 @@ class Arm64RelativePatcherTest : public RelativePatcherTest {
static constexpr uint32_t kBlPlusMax = 0x95ffffffu;
static constexpr uint32_t kBlMinusMax = 0x96000000u;
- // LDR immediate, unsigned offset.
+ // LDR immediate, 32-bit, unsigned offset.
static constexpr uint32_t kLdrWInsn = 0xb9400000u;
+ // LDR register, 32-bit, LSL #2.
+ static constexpr uint32_t kLdrWLsl2Insn = 0xb8607800u;
+
+ // LDUR, 32-bit.
+ static constexpr uint32_t kLdurWInsn = 0xb8400000u;
+
// ADD/ADDS/SUB/SUBS immediate, 64-bit.
static constexpr uint32_t kAddXInsn = 0x91000000u;
static constexpr uint32_t kAddsXInsn = 0xb1000000u;
@@ -68,7 +75,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest {
static constexpr uint32_t kLdrXSpRelInsn = 0xf94003edu;
// CBNZ x17, +0. Bits 5-23 are a placeholder for target offset from PC in units of 4-bytes.
- static constexpr uint32_t kCbnzIP1Plus0Insn = 0xb5000011;
+ static constexpr uint32_t kCbnzIP1Plus0Insn = 0xb5000011u;
void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) {
CHECK_LE(pos, code->size());
@@ -160,9 +167,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest {
}
std::vector<uint8_t> CompileMethodCallThunk() {
- ArmBaseRelativePatcher::ThunkKey key(
- ArmBaseRelativePatcher::ThunkType::kMethodCall,
- ArmBaseRelativePatcher::ThunkParams{{ 0, 0 }}); // NOLINT(whitespace/braces)
+ ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetMethodCallKey();
return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key);
}
@@ -188,7 +193,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest {
std::vector<uint8_t> GenNops(size_t num_nops) {
std::vector<uint8_t> result;
- result.reserve(num_nops * 4u + 4u);
+ result.reserve(num_nops * 4u);
for (size_t i = 0; i != num_nops; ++i) {
PushBackInsn(&result, kNopInsn);
}
@@ -228,7 +233,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest {
} else {
LOG(FATAL) << "Unexpected instruction: 0x" << std::hex << use_insn;
}
- uint32_t adrp = 0x90000000 | // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64)
+ uint32_t adrp = 0x90000000u | // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64)
((disp & 0x3000u) << (29 - 12)) | // immlo = ((disp & 0x3000u) >> 12) is at bit 29,
((disp & 0xffffc000) >> (14 - 5)) | // immhi = (disp >> 14) is at bit 5,
// We take the sign bit from the disp, limiting disp to +- 2GiB.
@@ -244,12 +249,14 @@ class Arm64RelativePatcherTest : public RelativePatcherTest {
return GenNopsAndAdrpAndUse(num_nops, method_offset, target_offset, kLdrWInsn);
}
- void TestNopsAdrpLdr(size_t num_nops, uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
- dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+ void TestNopsAdrpLdr(size_t num_nops, uint32_t bss_begin, uint32_t string_entry_offset) {
+ constexpr uint32_t kStringIndex = 1u;
+ string_index_to_offset_map_.Put(kStringIndex, string_entry_offset);
+ bss_begin_ = bss_begin;
auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u); // Unpatched.
const LinkerPatch patches[] = {
- LinkerPatch::DexCacheArrayPatch(num_nops * 4u , nullptr, num_nops * 4u, element_offset),
- LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, element_offset),
+ LinkerPatch::StringBssEntryPatch(num_nops * 4u , nullptr, num_nops * 4u, kStringIndex),
+ LinkerPatch::StringBssEntryPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, kStringIndex),
};
AddCompiledMethod(MethodRef(1u),
ArrayRef<const uint8_t>(code),
@@ -257,7 +264,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest {
Link();
uint32_t method1_offset = GetMethodOffset(1u);
- uint32_t target_offset = dex_cache_arrays_begin_ + element_offset;
+ uint32_t target_offset = bss_begin_ + string_entry_offset;
auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset);
EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
}
@@ -288,14 +295,16 @@ class Arm64RelativePatcherTest : public RelativePatcherTest {
void PrepareNopsAdrpInsn2Ldr(size_t num_nops,
uint32_t insn2,
- uint32_t dex_cache_arrays_begin,
- uint32_t element_offset) {
- dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+ uint32_t bss_begin,
+ uint32_t string_entry_offset) {
+ constexpr uint32_t kStringIndex = 1u;
+ string_index_to_offset_map_.Put(kStringIndex, string_entry_offset);
+ bss_begin_ = bss_begin;
auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u); // Unpatched.
InsertInsn(&code, num_nops * 4u + 4u, insn2);
const LinkerPatch patches[] = {
- LinkerPatch::DexCacheArrayPatch(num_nops * 4u , nullptr, num_nops * 4u, element_offset),
- LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, element_offset),
+ LinkerPatch::StringBssEntryPatch(num_nops * 4u , nullptr, num_nops * 4u, kStringIndex),
+ LinkerPatch::StringBssEntryPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, kStringIndex),
};
AddCompiledMethod(MethodRef(1u),
ArrayRef<const uint8_t>(code),
@@ -371,15 +380,15 @@ class Arm64RelativePatcherTest : public RelativePatcherTest {
void TestAdrpInsn2Ldr(uint32_t insn2,
uint32_t adrp_offset,
bool has_thunk,
- uint32_t dex_cache_arrays_begin,
- uint32_t element_offset) {
+ uint32_t bss_begin,
+ uint32_t string_entry_offset) {
uint32_t method1_offset =
kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
ASSERT_LT(method1_offset, adrp_offset);
CHECK_ALIGNED(adrp_offset, 4u);
uint32_t num_nops = (adrp_offset - method1_offset) / 4u;
- PrepareNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset);
- uint32_t target_offset = dex_cache_arrays_begin_ + element_offset;
+ PrepareNopsAdrpInsn2Ldr(num_nops, insn2, bss_begin, string_entry_offset);
+ uint32_t target_offset = bss_begin_ + string_entry_offset;
if (has_thunk) {
TestNopsAdrpInsn2AndUseHasThunk(num_nops, insn2, target_offset, kLdrWInsn);
} else {
@@ -390,33 +399,33 @@ class Arm64RelativePatcherTest : public RelativePatcherTest {
void TestAdrpLdurLdr(uint32_t adrp_offset,
bool has_thunk,
- uint32_t dex_cache_arrays_begin,
- uint32_t element_offset) {
- TestAdrpInsn2Ldr(kLdurInsn, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset);
+ uint32_t bss_begin,
+ uint32_t string_entry_offset) {
+ TestAdrpInsn2Ldr(kLdurInsn, adrp_offset, has_thunk, bss_begin, string_entry_offset);
}
void TestAdrpLdrPcRelLdr(uint32_t pcrel_ldr_insn,
int32_t pcrel_disp,
uint32_t adrp_offset,
bool has_thunk,
- uint32_t dex_cache_arrays_begin,
- uint32_t element_offset) {
+ uint32_t bss_begin,
+ uint32_t string_entry_offset) {
ASSERT_LT(pcrel_disp, 0x100000);
ASSERT_GE(pcrel_disp, -0x100000);
ASSERT_EQ(pcrel_disp & 0x3, 0);
uint32_t insn2 = pcrel_ldr_insn | (((static_cast<uint32_t>(pcrel_disp) >> 2) & 0x7ffffu) << 5);
- TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset);
+ TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, bss_begin, string_entry_offset);
}
void TestAdrpLdrSpRelLdr(uint32_t sprel_ldr_insn,
uint32_t sprel_disp_in_load_units,
uint32_t adrp_offset,
bool has_thunk,
- uint32_t dex_cache_arrays_begin,
- uint32_t element_offset) {
+ uint32_t bss_begin,
+ uint32_t string_entry_offset) {
ASSERT_LT(sprel_disp_in_load_units, 0x1000u);
uint32_t insn2 = sprel_ldr_insn | ((sprel_disp_in_load_units & 0xfffu) << 10);
- TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset);
+ TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, bss_begin, string_entry_offset);
}
void TestAdrpInsn2Add(uint32_t insn2,
@@ -466,17 +475,22 @@ class Arm64RelativePatcherTest : public RelativePatcherTest {
std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, uint32_t holder_reg) {
const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
0u, Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg));
- auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get());
- ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch);
- return patcher->CompileThunk(key);
+ ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
+ return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key);
+ }
+
+ std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) {
+ LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
+ 0u, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg));
+ ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
+ return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key);
}
std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) {
LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
0u, Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg));
- auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get());
- ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch);
- return patcher->CompileThunk(key);
+ ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
+ return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key);
}
uint32_t GetOutputInsn(uint32_t offset) {
@@ -488,7 +502,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest {
(static_cast<uint32_t>(output_[offset + 3]) << 24);
}
- void TestBakerField(uint32_t offset, uint32_t root_reg);
+ void TestBakerField(uint32_t offset, uint32_t ref_reg);
};
const uint8_t Arm64RelativePatcherTest::kCallRawCode[] = {
@@ -716,19 +730,19 @@ TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarBefore) {
EXPECT_TRUE(CheckThunk(thunk_offset));
}
-TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference1) {
+TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry1) {
TestNopsAdrpLdr(0u, 0x12345678u, 0x1234u);
}
-TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference2) {
+TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry2) {
TestNopsAdrpLdr(0u, -0x12345678u, 0x4444u);
}
-TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference3) {
+TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry3) {
TestNopsAdrpLdr(0u, 0x12345000u, 0x3ffcu);
}
-TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference4) {
+TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry4) {
TestNopsAdrpLdr(0u, 0x12345000u, 0x4000u);
}
@@ -753,7 +767,7 @@ TEST_F(Arm64RelativePatcherTestDefault, StringReference4) {
test(0xff4u, disp2) test(0xff8u, disp2) test(0xffcu, disp2) test(0x1000u, disp2)
#define DEFAULT_LDUR_LDR_TEST(adrp_offset, disp) \
- TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## Ldur ## disp) { \
+ TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## Ldur ## disp) { \
bool has_thunk = ((adrp_offset) == 0xff8u || (adrp_offset) == 0xffcu); \
TestAdrpLdurLdr(adrp_offset, has_thunk, 0x12345678u, disp); \
}
@@ -761,7 +775,7 @@ TEST_F(Arm64RelativePatcherTestDefault, StringReference4) {
TEST_FOR_OFFSETS(DEFAULT_LDUR_LDR_TEST, 0x1234, 0x1238)
#define DENVER64_LDUR_LDR_TEST(adrp_offset, disp) \
- TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference ## adrp_offset ## Ldur ## disp) { \
+ TEST_F(Arm64RelativePatcherTestDenver64, StringBssEntry ## adrp_offset ## Ldur ## disp) { \
TestAdrpLdurLdr(adrp_offset, false, 0x12345678u, disp); \
}
@@ -769,7 +783,7 @@ TEST_FOR_OFFSETS(DENVER64_LDUR_LDR_TEST, 0x1234, 0x1238)
// LDR <Wt>, <label> is always aligned. We should never have to use a fixup.
#define LDRW_PCREL_LDR_TEST(adrp_offset, disp) \
- TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## WPcRel ## disp) { \
+ TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## WPcRel ## disp) { \
TestAdrpLdrPcRelLdr(kLdrWPcRelInsn, disp, adrp_offset, false, 0x12345678u, 0x1234u); \
}
@@ -777,7 +791,7 @@ TEST_FOR_OFFSETS(LDRW_PCREL_LDR_TEST, 0x1234, 0x1238)
// LDR <Xt>, <label> is aligned when offset + displacement is a multiple of 8.
#define LDRX_PCREL_LDR_TEST(adrp_offset, disp) \
- TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## XPcRel ## disp) { \
+ TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## XPcRel ## disp) { \
bool unaligned = !IsAligned<8u>((adrp_offset) + 4u + static_cast<uint32_t>(disp)); \
bool has_thunk = ((adrp_offset) == 0xff8u || (adrp_offset) == 0xffcu) && unaligned; \
TestAdrpLdrPcRelLdr(kLdrXPcRelInsn, disp, adrp_offset, has_thunk, 0x12345678u, 0x1234u); \
@@ -787,14 +801,14 @@ TEST_FOR_OFFSETS(LDRX_PCREL_LDR_TEST, 0x1234, 0x1238)
// LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned. No fixup needed.
#define LDRW_SPREL_LDR_TEST(adrp_offset, disp) \
- TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## WSpRel ## disp) { \
+ TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## WSpRel ## disp) { \
TestAdrpLdrSpRelLdr(kLdrWSpRelInsn, (disp) >> 2, adrp_offset, false, 0x12345678u, 0x1234u); \
}
TEST_FOR_OFFSETS(LDRW_SPREL_LDR_TEST, 0, 4)
#define LDRX_SPREL_LDR_TEST(adrp_offset, disp) \
- TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## XSpRel ## disp) { \
+ TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## XSpRel ## disp) { \
TestAdrpLdrSpRelLdr(kLdrXSpRelInsn, (disp) >> 3, adrp_offset, false, 0x12345678u, 0x1234u); \
}
@@ -885,7 +899,7 @@ TEST_FOR_OFFSETS(LDRW_SPREL_ADD_TEST, 0, 4)
TEST_FOR_OFFSETS(LDRX_SPREL_ADD_TEST, 0, 8)
-void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg) {
+void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg) {
uint32_t valid_regs[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved.
@@ -899,7 +913,7 @@ void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg
uint32_t method_idx = 0u;
for (uint32_t base_reg : valid_regs) {
for (uint32_t holder_reg : valid_regs) {
- uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | root_reg;
+ uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg;
const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr});
ASSERT_EQ(kMethodCodeSize, raw_code.size());
ArrayRef<const uint8_t> code(raw_code);
@@ -922,7 +936,7 @@ void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg
++method_idx;
uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset);
uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2));
- uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | root_reg;
+ uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg;
const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr});
ASSERT_EQ(kMethodCodeSize, expected_code.size());
ASSERT_TRUE(
@@ -942,7 +956,7 @@ void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg
if (holder_reg == base_reg) {
// Verify that the null-check CBZ uses the correct register, i.e. holder_reg.
ASSERT_GE(output_.size() - gray_check_offset, 4u);
- ASSERT_EQ(0x34000000 | holder_reg, GetOutputInsn(thunk_offset) & 0xff00001f);
+ ASSERT_EQ(0x34000000u | holder_reg, GetOutputInsn(thunk_offset) & 0xff00001fu);
gray_check_offset +=4u;
}
// Verify that the lock word for gray bit check is loaded from the holder address.
@@ -955,12 +969,12 @@ void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg
/* ip0 */ 16;
EXPECT_EQ(load_lock_word, GetOutputInsn(gray_check_offset));
// Verify the gray bit check.
- const uint32_t check_gray_bit_witout_offset =
- 0x37000000 | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16;
- EXPECT_EQ(check_gray_bit_witout_offset, GetOutputInsn(gray_check_offset + 4u) & 0xfff8001f);
+ const uint32_t check_gray_bit_without_offset =
+ 0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16;
+ EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(gray_check_offset + 4u) & 0xfff8001fu);
// Verify the fake dependency.
const uint32_t fake_dependency =
- 0x8b408000 | // ADD Xd, Xn, Xm, LSR 32
+ 0x8b408000u | // ADD Xd, Xn, Xm, LSR 32
(/* ip0 */ 16 << 16) | // Xm = ip0
(base_reg << 5) | // Xn = base_reg
base_reg; // Xd = base_reg
@@ -973,19 +987,19 @@ void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg
}
}
-#define TEST_BAKER_FIELD(offset, root_reg) \
+#define TEST_BAKER_FIELD(offset, ref_reg) \
TEST_F(Arm64RelativePatcherTestDefault, \
- BakerOffset##offset##_##root_reg) { \
- TestBakerField(offset, root_reg); \
+ BakerOffset##offset##_##ref_reg) { \
+ TestBakerField(offset, ref_reg); \
}
-TEST_BAKER_FIELD(/* offset */ 0, /* root_reg */ 0)
-TEST_BAKER_FIELD(/* offset */ 8, /* root_reg */ 15)
-TEST_BAKER_FIELD(/* offset */ 0x3ffc, /* root_reg */ 29)
+TEST_BAKER_FIELD(/* offset */ 0, /* ref_reg */ 0)
+TEST_BAKER_FIELD(/* offset */ 8, /* ref_reg */ 15)
+TEST_BAKER_FIELD(/* offset */ 0x3ffc, /* ref_reg */ 29)
TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddle) {
// One thunk in the middle with maximum distance branches to it from both sides.
- // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`.
+ // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
constexpr uint32_t kLiteralOffset1 = 4;
const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn});
ArrayRef<const uint8_t> code1(raw_code1);
@@ -1046,7 +1060,7 @@ TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddle) {
TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkBeforeFiller) {
// Based on the first part of BakerOffsetThunkInTheMiddle but the CBNZ is one instruction
// earlier, so the thunk is emitted before the filler.
- // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`.
+ // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
constexpr uint32_t kLiteralOffset1 = 0;
const std::vector<uint8_t> raw_code1 = RawCode({kCbnzIP1Plus0Insn, kLdrWInsn, kNopInsn});
ArrayRef<const uint8_t> code1(raw_code1);
@@ -1076,7 +1090,7 @@ TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkBeforeFiller) {
TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddleUnreachableFromLast) {
// Based on the BakerOffsetThunkInTheMiddle but the CBNZ in the last method is preceded
// by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end.
- // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`.
+ // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
constexpr uint32_t kLiteralOffset1 = 4;
const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn});
ArrayRef<const uint8_t> code1(raw_code1);
@@ -1132,7 +1146,88 @@ TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddleUnreachableFr
ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2)));
}
-TEST_F(Arm64RelativePatcherTestDefault, BakerRootGcRoot) {
+TEST_F(Arm64RelativePatcherTestDefault, BakerArray) {
+ uint32_t valid_regs[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved.
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ // LR and SP/ZR are reserved.
+ };
+ auto ldr = [](uint32_t base_reg) {
+ uint32_t index_reg = (base_reg == 0u) ? 1u : 0u;
+ uint32_t ref_reg = (base_reg == 2) ? 3u : 2u;
+ return kLdrWLsl2Insn | (index_reg << 16) | (base_reg << 5) | ref_reg;
+ };
+ constexpr size_t kMethodCodeSize = 8u;
+ constexpr size_t kLiteralOffset = 0u;
+ uint32_t method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ ++method_idx;
+ const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr(base_reg)});
+ ASSERT_EQ(kMethodCodeSize, raw_code.size());
+ ArrayRef<const uint8_t> code(raw_code);
+ const LinkerPatch patches[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(
+ kLiteralOffset, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)),
+ };
+ AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+ }
+ Link();
+
+ // All thunks are at the end.
+ uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArm64Alignment);
+ method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ ++method_idx;
+ uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset);
+ uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2));
+ const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr(base_reg)});
+ ASSERT_EQ(kMethodCodeSize, expected_code.size());
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+ std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg);
+ ASSERT_GT(output_.size(), thunk_offset);
+ ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+ ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+ expected_thunk.size());
+ if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+ DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+ ASSERT_TRUE(false);
+ }
+
+ // Verify that the lock word for gray bit check is loaded from the correct address
+ // before the base_reg which points to the array data.
+ static constexpr size_t kGrayCheckInsns = 5;
+ ASSERT_GE(output_.size() - thunk_offset, 4u * kGrayCheckInsns);
+ int32_t data_offset =
+ mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+ int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset;
+ ASSERT_LT(offset, 0);
+ const uint32_t load_lock_word =
+ kLdurWInsn |
+ ((offset & 0x1ffu) << 12) |
+ (base_reg << 5) |
+ /* ip0 */ 16;
+ EXPECT_EQ(load_lock_word, GetOutputInsn(thunk_offset));
+ // Verify the gray bit check.
+ const uint32_t check_gray_bit_without_offset =
+ 0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16;
+ EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(thunk_offset + 4u) & 0xfff8001fu);
+ // Verify the fake dependency.
+ const uint32_t fake_dependency =
+ 0x8b408000u | // ADD Xd, Xn, Xm, LSR 32
+ (/* ip0 */ 16 << 16) | // Xm = ip0
+ (base_reg << 5) | // Xn = base_reg
+ base_reg; // Xd = base_reg
+ EXPECT_EQ(fake_dependency, GetOutputInsn(thunk_offset + 12u));
+ // Do not check the rest of the implementation.
+
+ // The next thunk follows on the next aligned offset.
+ thunk_offset += RoundUp(expected_thunk.size(), kArm64Alignment);
+ }
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, BakerGcRoot) {
uint32_t valid_regs[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved.
@@ -1180,7 +1275,7 @@ TEST_F(Arm64RelativePatcherTestDefault, BakerRootGcRoot) {
// Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg.
ASSERT_GE(output_.size() - thunk_offset, 4u);
- ASSERT_EQ(0x34000000 | root_reg, GetOutputInsn(thunk_offset) & 0xff00001f);
+ ASSERT_EQ(0x34000000u | root_reg, GetOutputInsn(thunk_offset) & 0xff00001fu);
// Do not check the rest of the implementation.
// The next thunk follows on the next aligned offset.
diff --git a/compiler/linker/mips/relative_patcher_mips32r6_test.cc b/compiler/linker/mips/relative_patcher_mips32r6_test.cc
index 474eb73e08..63ad8a58d5 100644
--- a/compiler/linker/mips/relative_patcher_mips32r6_test.cc
+++ b/compiler/linker/mips/relative_patcher_mips32r6_test.cc
@@ -37,7 +37,7 @@ class Mips32r6RelativePatcherTest : public RelativePatcherTest {
}
void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
- void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset);
+ void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset);
void TestStringReference(uint32_t string_offset);
};
@@ -69,14 +69,15 @@ void Mips32r6RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const Link
EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
}
-void Mips32r6RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin,
- uint32_t element_offset) {
- dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+void Mips32r6RelativePatcherTest::TestStringBssEntry(uint32_t bss_begin,
+ uint32_t string_entry_offset) {
+ constexpr uint32_t kStringIndex = 1u;
+ string_index_to_offset_map_.Put(kStringIndex, string_entry_offset);
+ bss_begin_ = bss_begin;
LinkerPatch patches[] = {
- LinkerPatch::DexCacheArrayPatch(kLiteralOffset, nullptr, kAnchorOffset, element_offset)
+ LinkerPatch::StringBssEntryPatch(kLiteralOffset, nullptr, kAnchorOffset, kStringIndex)
};
- CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches),
- dex_cache_arrays_begin_ + element_offset);
+ CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset);
}
void Mips32r6RelativePatcherTest::TestStringReference(uint32_t string_offset) {
@@ -88,8 +89,8 @@ void Mips32r6RelativePatcherTest::TestStringReference(uint32_t string_offset) {
CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset);
}
-TEST_F(Mips32r6RelativePatcherTest, DexCacheReference) {
- TestDexCacheReference(/* dex_cache_arrays_begin */ 0x12345678, /* element_offset */ 0x1234);
+TEST_F(Mips32r6RelativePatcherTest, StringBssEntry) {
+ TestStringBssEntry(/* bss_begin */ 0x12345678, /* string_entry_offset */ 0x1234);
}
TEST_F(Mips32r6RelativePatcherTest, StringReference) {
diff --git a/compiler/linker/mips/relative_patcher_mips_test.cc b/compiler/linker/mips/relative_patcher_mips_test.cc
index b0d1294cf4..961b31266f 100644
--- a/compiler/linker/mips/relative_patcher_mips_test.cc
+++ b/compiler/linker/mips/relative_patcher_mips_test.cc
@@ -20,10 +20,6 @@
namespace art {
namespace linker {
-// We'll maximize the range of a single load instruction for dex cache array accesses
-// by aligning offset -32768 with the offset of the first used element.
-static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000;
-
class MipsRelativePatcherTest : public RelativePatcherTest {
public:
MipsRelativePatcherTest() : RelativePatcherTest(kMips, "mips32r2") {}
@@ -41,7 +37,7 @@ class MipsRelativePatcherTest : public RelativePatcherTest {
}
void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
- void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset);
+ void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset);
void TestStringReference(uint32_t string_offset);
};
@@ -65,9 +61,7 @@ void MipsRelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPa
ASSERT_TRUE(result.first);
uint32_t diff = target_offset - (result.second + kAnchorOffset);
- if (patches[0].GetType() == LinkerPatch::Type::kDexCacheArray) {
- diff += kDexCacheArrayLwOffset;
- }
+ CHECK_NE(patches[0].GetType(), LinkerPatch::Type::kDexCacheArray);
diff += (diff & 0x8000) << 1; // Account for sign extension in addiu.
const uint8_t expected_code[] = {
@@ -79,14 +73,15 @@ void MipsRelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPa
EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
}
-void MipsRelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin,
- uint32_t element_offset) {
- dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+void MipsRelativePatcherTest::TestStringBssEntry(uint32_t bss_begin,
+ uint32_t string_entry_offset) {
+ constexpr uint32_t kStringIndex = 1u;
+ string_index_to_offset_map_.Put(kStringIndex, string_entry_offset);
+ bss_begin_ = bss_begin;
LinkerPatch patches[] = {
- LinkerPatch::DexCacheArrayPatch(kLiteralOffset, nullptr, kAnchorOffset, element_offset)
+ LinkerPatch::StringBssEntryPatch(kLiteralOffset, nullptr, kAnchorOffset, kStringIndex)
};
- CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches),
- dex_cache_arrays_begin_ + element_offset);
+ CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset);
}
void MipsRelativePatcherTest::TestStringReference(uint32_t string_offset) {
@@ -98,8 +93,8 @@ void MipsRelativePatcherTest::TestStringReference(uint32_t string_offset) {
CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset);
}
-TEST_F(MipsRelativePatcherTest, DexCacheReference) {
- TestDexCacheReference(/* dex_cache_arrays_begin */ 0x12345678, /* element_offset */ 0x1234);
+TEST_F(MipsRelativePatcherTest, StringBssEntry) {
+ TestStringBssEntry(/* bss_begin */ 0x12345678, /* string_entry_offset */ 0x1234);
}
TEST_F(MipsRelativePatcherTest, StringReference) {
diff --git a/compiler/linker/mips64/relative_patcher_mips64_test.cc b/compiler/linker/mips64/relative_patcher_mips64_test.cc
index c3170584e4..9c9e24a96e 100644
--- a/compiler/linker/mips64/relative_patcher_mips64_test.cc
+++ b/compiler/linker/mips64/relative_patcher_mips64_test.cc
@@ -39,7 +39,7 @@ class Mips64RelativePatcherTest : public RelativePatcherTest {
}
void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
- void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset);
+ void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset);
void TestStringReference(uint32_t string_offset);
};
@@ -76,18 +76,19 @@ void Mips64RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const Linker
EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
}
-void Mips64RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin,
- uint32_t element_offset) {
- dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+void Mips64RelativePatcherTest::TestStringBssEntry(uint32_t bss_begin,
+ uint32_t string_entry_offset) {
+ constexpr uint32_t kStringIndex = 1u;
+ string_index_to_offset_map_.Put(kStringIndex, string_entry_offset);
+ bss_begin_ = bss_begin;
LinkerPatch patches[] = {
- LinkerPatch::DexCacheArrayPatch(kLiteralOffset, nullptr, kAnchorOffset, element_offset)
+ LinkerPatch::StringBssEntryPatch(kLiteralOffset, nullptr, kAnchorOffset, kStringIndex)
};
- CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches),
- dex_cache_arrays_begin_ + element_offset);
+ CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset);
}
-TEST_F(Mips64RelativePatcherTest, DexCacheReference) {
- TestDexCacheReference(/* dex_cache_arrays_begin */ 0x12345678, /* element_offset */ 0x1234);
+TEST_F(Mips64RelativePatcherTest, StringBssEntry) {
+ TestStringBssEntry(/* bss_begin */ 0x12345678, /* string_entry_offset */ 0x1234);
}
TEST_F(Mips64RelativePatcherTest, CallOther) {
diff --git a/compiler/linker/multi_oat_relative_patcher.h b/compiler/linker/multi_oat_relative_patcher.h
index 247b29017e..bdc1ee1d27 100644
--- a/compiler/linker/multi_oat_relative_patcher.h
+++ b/compiler/linker/multi_oat_relative_patcher.h
@@ -102,7 +102,7 @@ class MultiOatRelativePatcher FINAL {
relative_patcher_->PatchCall(code, literal_offset, patch_offset, target_offset);
}
- // Wrapper around RelativePatcher::PatchDexCacheReference(), doing offset adjustment.
+ // Wrapper around RelativePatcher::PatchPcRelativeReference(), doing offset adjustment.
void PatchPcRelativeReference(std::vector<uint8_t>* code,
const LinkerPatch& patch,
uint32_t patch_offset,
diff --git a/compiler/linker/multi_oat_relative_patcher_test.cc b/compiler/linker/multi_oat_relative_patcher_test.cc
index 951588a857..615b2b97be 100644
--- a/compiler/linker/multi_oat_relative_patcher_test.cc
+++ b/compiler/linker/multi_oat_relative_patcher_test.cc
@@ -282,7 +282,7 @@ TEST_F(MultiOatRelativePatcherTest, Patch) {
uint32_t method2_patch_offset = 0x7654u;
uint32_t method2_target_offset = 0xccccu;
LinkerPatch method2_patch =
- LinkerPatch::DexCacheArrayPatch(method2_literal_offset, nullptr, 0u, 1234u);
+ LinkerPatch::StringBssEntryPatch(method2_literal_offset, nullptr, 0u, 1u);
patcher_.PatchPcRelativeReference(
&code, method2_patch, method2_patch_offset, method2_target_offset);
DCHECK_EQ(method2_literal_offset, mock_->last_literal_offset_);
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index d9a87a0cfd..bff68080c6 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -31,6 +31,7 @@
#include "method_reference.h"
#include "oat.h"
#include "oat_quick_method_header.h"
+#include "string_reference.h"
#include "vector_output_stream.h"
namespace art {
@@ -61,7 +62,7 @@ class RelativePatcherTest : public testing::Test {
features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)),
method_offset_map_(),
patcher_(RelativePatcher::Create(instruction_set, features_.get(), &method_offset_map_)),
- dex_cache_arrays_begin_(0u),
+ bss_begin_(0u),
compiled_method_refs_(),
compiled_methods_(),
patched_code_(),
@@ -157,8 +158,9 @@ class RelativePatcherTest : public testing::Test {
result.first ? result.second : kTrampolineOffset + compiled_method->CodeDelta();
patcher_->PatchCall(&patched_code_, patch.LiteralOffset(),
offset + patch.LiteralOffset(), target_offset);
- } else if (patch.GetType() == LinkerPatch::Type::kDexCacheArray) {
- uint32_t target_offset = dex_cache_arrays_begin_ + patch.TargetDexCacheElementOffset();
+ } else if (patch.GetType() == LinkerPatch::Type::kStringBssEntry) {
+ uint32_t target_offset =
+ bss_begin_ + string_index_to_offset_map_.Get(patch.TargetStringIndex().index_);
patcher_->PatchPcRelativeReference(&patched_code_,
patch,
offset + patch.LiteralOffset(),
@@ -276,7 +278,7 @@ class RelativePatcherTest : public testing::Test {
std::unique_ptr<const InstructionSetFeatures> features_;
MethodOffsetMap method_offset_map_;
std::unique_ptr<RelativePatcher> patcher_;
- uint32_t dex_cache_arrays_begin_;
+ uint32_t bss_begin_;
SafeMap<uint32_t, uint32_t> string_index_to_offset_map_;
std::vector<MethodReference> compiled_method_refs_;
std::vector<std::unique_ptr<CompiledMethod>> compiled_methods_;
diff --git a/compiler/linker/x86/relative_patcher_x86_test.cc b/compiler/linker/x86/relative_patcher_x86_test.cc
index 2a44b7990e..0bd9de8e15 100644
--- a/compiler/linker/x86/relative_patcher_x86_test.cc
+++ b/compiler/linker/x86/relative_patcher_x86_test.cc
@@ -107,9 +107,11 @@ TEST_F(X86RelativePatcherTest, CallTrampoline) {
EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
}
-TEST_F(X86RelativePatcherTest, DexCacheReference) {
- dex_cache_arrays_begin_ = 0x12345678;
- constexpr size_t kElementOffset = 0x1234;
+TEST_F(X86RelativePatcherTest, StringBssEntry) {
+ bss_begin_ = 0x12345678;
+ constexpr size_t kStringEntryOffset = 0x1234;
+ constexpr uint32_t kStringIndex = 1u;
+ string_index_to_offset_map_.Put(kStringIndex, kStringEntryOffset);
static const uint8_t raw_code[] = {
0xe8, 0x00, 0x00, 0x00, 0x00, // call +0
0x5b, // pop ebx
@@ -118,15 +120,14 @@ TEST_F(X86RelativePatcherTest, DexCacheReference) {
constexpr uint32_t anchor_offset = 5u; // After call +0.
ArrayRef<const uint8_t> code(raw_code);
LinkerPatch patches[] = {
- LinkerPatch::DexCacheArrayPatch(code.size() - 4u, nullptr, anchor_offset, kElementOffset),
+ LinkerPatch::StringBssEntryPatch(code.size() - 4u, nullptr, anchor_offset, kStringIndex),
};
AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches));
Link();
auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
ASSERT_TRUE(result.first);
- uint32_t diff =
- dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset);
+ uint32_t diff = bss_begin_ + kStringEntryOffset - (result.second + anchor_offset);
static const uint8_t expected_code[] = {
0xe8, 0x00, 0x00, 0x00, 0x00, // call +0
0x5b, // pop ebx
diff --git a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc
index 2b46453255..6d6bb40fb4 100644
--- a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc
+++ b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc
@@ -127,19 +127,20 @@ TEST_F(X86_64RelativePatcherTest, CallTrampoline) {
EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
}
-TEST_F(X86_64RelativePatcherTest, DexCacheReference) {
- dex_cache_arrays_begin_ = 0x12345678;
- constexpr size_t kElementOffset = 0x1234;
+TEST_F(X86_64RelativePatcherTest, StringBssEntry) {
+ bss_begin_ = 0x12345678;
+ constexpr size_t kStringEntryOffset = 0x1234;
+ constexpr uint32_t kStringIndex = 1u;
+ string_index_to_offset_map_.Put(kStringIndex, kStringEntryOffset);
LinkerPatch patches[] = {
- LinkerPatch::DexCacheArrayPatch(kDexCacheLoadCode.size() - 4u, nullptr, 0u, kElementOffset),
+ LinkerPatch::StringBssEntryPatch(kDexCacheLoadCode.size() - 4u, nullptr, 0u, kStringIndex),
};
AddCompiledMethod(MethodRef(1u), kDexCacheLoadCode, ArrayRef<const LinkerPatch>(patches));
Link();
auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
ASSERT_TRUE(result.first);
- uint32_t diff =
- dex_cache_arrays_begin_ + kElementOffset - (result.second + kDexCacheLoadCode.size());
+ uint32_t diff = bss_begin_ + kStringEntryOffset - (result.second + kDexCacheLoadCode.size());
static const uint8_t expected_code[] = {
0x8b, 0x05,
static_cast<uint8_t>(diff),
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 6b5387ae19..fed2d34cdb 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -28,13 +28,12 @@
#include "base/stl_util.h"
#include "base/unix_file/fd_file.h"
#include "class_linker.h"
-#include "compiled_class.h"
#include "compiled_method.h"
#include "debug/method_debug_info.h"
#include "dex/verification_results.h"
#include "dex_file-inl.h"
#include "dexlayout.h"
-#include "driver/compiler_driver.h"
+#include "driver/compiler_driver-inl.h"
#include "driver/compiler_options.h"
#include "gc/space/image_space.h"
#include "gc/space/space.h"
@@ -712,17 +711,17 @@ class OatWriter::InitOatClassesMethodVisitor : public DexMethodVisitor {
bool EndClass() {
ClassReference class_ref(dex_file_, class_def_index_);
- CompiledClass* compiled_class = writer_->compiler_driver_->GetCompiledClass(class_ref);
mirror::Class::Status status;
- if (compiled_class != nullptr) {
- status = compiled_class->GetStatus();
- } else if (writer_->compiler_driver_->GetVerificationResults()->IsClassRejected(class_ref)) {
- // The oat class status is used only for verification of resolved classes,
- // so use kStatusErrorResolved whether the class was resolved or unresolved
- // during compile-time verification.
- status = mirror::Class::kStatusErrorResolved;
- } else {
- status = mirror::Class::kStatusNotReady;
+ bool found = writer_->compiler_driver_->GetCompiledClass(class_ref, &status);
+ if (!found) {
+ if (writer_->compiler_driver_->GetVerificationResults()->IsClassRejected(class_ref)) {
+ // The oat class status is used only for verification of resolved classes,
+ // so use kStatusErrorResolved whether the class was resolved or unresolved
+ // during compile-time verification.
+ status = mirror::Class::kStatusErrorResolved;
+ } else {
+ status = mirror::Class::kStatusNotReady;
+ }
}
writer_->oat_classes_.emplace_back(offset_,
@@ -1332,19 +1331,12 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
PatchCodeAddress(&patched_code_, literal_offset, target_offset);
break;
}
- case LinkerPatch::Type::kMethod: {
- ArtMethod* method = GetTargetMethod(patch);
- PatchMethodAddress(&patched_code_, literal_offset, method);
- break;
- }
- case LinkerPatch::Type::kString: {
- mirror::String* string = GetTargetString(patch);
- PatchObjectAddress(&patched_code_, literal_offset, string);
- break;
- }
- case LinkerPatch::Type::kType: {
- mirror::Class* type = GetTargetType(patch);
- PatchObjectAddress(&patched_code_, literal_offset, type);
+ case LinkerPatch::Type::kMethodRelative: {
+ uint32_t target_offset = GetTargetMethodOffset(GetTargetMethod(patch));
+ writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_,
+ patch,
+ offset_ + literal_offset,
+ target_offset);
break;
}
case LinkerPatch::Type::kBakerReadBarrierBranch: {
@@ -1468,6 +1460,15 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
}
}
+ uint32_t GetTargetMethodOffset(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_) {
+ DCHECK(writer_->HasBootImage());
+ method = writer_->image_writer_->GetImageMethodAddress(method);
+ size_t oat_index = writer_->image_writer_->GetOatIndexForDexFile(dex_file_);
+ uintptr_t oat_data_begin = writer_->image_writer_->GetOatDataBegin(oat_index);
+ // TODO: Clean up offset types. The target offset must be treated as signed.
+ return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(method) - oat_data_begin);
+ }
+
uint32_t GetTargetObjectOffset(mirror::Object* object) REQUIRES_SHARED(Locks::mutator_lock_) {
DCHECK(writer_->HasBootImage());
object = writer_->image_writer_->GetImageAddress(object);
@@ -1497,34 +1498,6 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
data[3] = (address >> 24) & 0xffu;
}
- void PatchMethodAddress(std::vector<uint8_t>* code, uint32_t offset, ArtMethod* method)
- REQUIRES_SHARED(Locks::mutator_lock_) {
- if (writer_->HasBootImage()) {
- method = writer_->image_writer_->GetImageMethodAddress(method);
- } else if (kIsDebugBuild) {
- // NOTE: We're using linker patches for app->boot references when the image can
- // be relocated and therefore we need to emit .oat_patches. We're not using this
- // for app->app references, so check that the method is an image method.
- std::vector<gc::space::ImageSpace*> image_spaces =
- Runtime::Current()->GetHeap()->GetBootImageSpaces();
- bool contains_method = false;
- for (gc::space::ImageSpace* image_space : image_spaces) {
- size_t method_offset = reinterpret_cast<const uint8_t*>(method) - image_space->Begin();
- contains_method |=
- image_space->GetImageHeader().GetMethodsSection().Contains(method_offset);
- }
- CHECK(contains_method);
- }
- // Note: We only patch targeting ArtMethods in image which is in the low 4gb.
- uint32_t address = PointerToLowMemUInt32(method);
- DCHECK_LE(offset + 4, code->size());
- uint8_t* data = &(*code)[offset];
- data[0] = address & 0xffu;
- data[1] = (address >> 8) & 0xffu;
- data[2] = (address >> 16) & 0xffu;
- data[3] = (address >> 24) & 0xffu;
- }
-
void PatchCodeAddress(std::vector<uint8_t>* code, uint32_t offset, uint32_t target_offset)
REQUIRES_SHARED(Locks::mutator_lock_) {
uint32_t address = target_offset;
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index e778f75551..66b70ade2e 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -31,7 +31,7 @@
#include "os.h"
#include "safe_map.h"
#include "string_reference.h"
-#include "utils/type_reference.h"
+#include "type_reference.h"
namespace art {
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc
index 5e70a8284d..1e75f10ebe 100644
--- a/compiler/optimizing/block_builder.cc
+++ b/compiler/optimizing/block_builder.cc
@@ -310,16 +310,18 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
// least one predecessor is not covered by the same TryItem as the try block.
// We do not split each edge separately, but rather create one boundary block
// that all predecessors are relinked to. This preserves loop headers (b/23895756).
- for (auto entry : try_block_info) {
- HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
+ for (const auto& entry : try_block_info) {
+ uint32_t block_id = entry.first;
+ const DexFile::TryItem* try_item = entry.second;
+ HBasicBlock* try_block = graph_->GetBlocks()[block_id];
for (HBasicBlock* predecessor : try_block->GetPredecessors()) {
- if (GetTryItem(predecessor, try_block_info) != entry.second) {
+ if (GetTryItem(predecessor, try_block_info) != try_item) {
// Found a predecessor not covered by the same TryItem. Insert entering
// boundary block.
HTryBoundary* try_entry =
new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kEntry, try_block->GetDexPc());
try_block->CreateImmediateDominator()->AddInstruction(try_entry);
- LinkToCatchBlocks(try_entry, code_item_, entry.second, catch_blocks);
+ LinkToCatchBlocks(try_entry, code_item_, try_item, catch_blocks);
break;
}
}
@@ -327,8 +329,10 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
// Do a second pass over the try blocks and insert exit TryBoundaries where
// the successor is not in the same TryItem.
- for (auto entry : try_block_info) {
- HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
+ for (const auto& entry : try_block_info) {
+ uint32_t block_id = entry.first;
+ const DexFile::TryItem* try_item = entry.second;
+ HBasicBlock* try_block = graph_->GetBlocks()[block_id];
// NOTE: Do not use iterators because SplitEdge would invalidate them.
for (size_t i = 0, e = try_block->GetSuccessors().size(); i < e; ++i) {
HBasicBlock* successor = try_block->GetSuccessors()[i];
@@ -337,7 +341,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
// covered by the same TryItem. Otherwise the previous pass would have
// created a non-throwing boundary block.
if (GetTryItem(successor, try_block_info) != nullptr) {
- DCHECK_EQ(entry.second, GetTryItem(successor, try_block_info));
+ DCHECK_EQ(try_item, GetTryItem(successor, try_block_info));
continue;
}
@@ -345,7 +349,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
HTryBoundary* try_exit =
new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kExit, successor->GetDexPc());
graph_->SplitEdge(try_block, successor)->AddInstruction(try_exit);
- LinkToCatchBlocks(try_exit, code_item_, entry.second, catch_blocks);
+ LinkToCatchBlocks(try_exit, code_item_, try_item, catch_blocks);
}
}
}
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index ed630cda91..f3ecdf036a 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1734,8 +1734,8 @@ class BCEVisitor : public HGraphVisitor {
*/
void InsertPhiNodes() {
// Scan all new deoptimization blocks.
- for (auto it1 = taken_test_loop_.begin(); it1 != taken_test_loop_.end(); ++it1) {
- HBasicBlock* true_block = it1->second;
+ for (const auto& entry : taken_test_loop_) {
+ HBasicBlock* true_block = entry.second;
HBasicBlock* new_preheader = true_block->GetSingleSuccessor();
// Scan all instructions in a new deoptimization block.
for (HInstructionIterator it(true_block->GetInstructions()); !it.Done(); it.Advance()) {
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index cb6e14b2bd..a949c33149 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -43,7 +43,7 @@ class BoundsCheckEliminationTest : public testing::Test {
void RunBCE() {
graph_->BuildDominatorTree();
- InstructionSimplifier(graph_, /* codegen */ nullptr).Run();
+ InstructionSimplifier(graph_, /* codegen */ nullptr, /* driver */ nullptr).Run();
SideEffectsAnalysis side_effects(graph_);
side_effects.Run();
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 5136d7d2b8..c918ee6687 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -58,7 +58,7 @@
#include "parallel_move_resolver.h"
#include "ssa_liveness_analysis.h"
#include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
#include "utils/assembler.h"
namespace art {
@@ -145,7 +145,7 @@ size_t CodeGenerator::GetCacheOffset(uint32_t index) {
}
size_t CodeGenerator::GetCachePointerOffset(uint32_t index) {
- auto pointer_size = InstructionSetPointerSize(GetInstructionSet());
+ PointerSize pointer_size = InstructionSetPointerSize(GetInstructionSet());
return static_cast<size_t>(pointer_size) * index;
}
@@ -508,7 +508,7 @@ void CodeGenerator::GenerateUnresolvedFieldAccess(
void CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls,
Location runtime_type_index_location,
Location runtime_return_location) {
- DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kDexCacheViaMethod);
+ DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kRuntimeCall);
DCHECK_EQ(cls->InputCount(), 1u);
LocationSummary* locations = new (cls->GetBlock()->GetGraph()->GetArena()) LocationSummary(
cls, LocationSummary::kCallOnMainOnly);
@@ -518,7 +518,7 @@ void CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls,
}
void CodeGenerator::GenerateLoadClassRuntimeCall(HLoadClass* cls) {
- DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kDexCacheViaMethod);
+ DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kRuntimeCall);
LocationSummary* locations = cls->GetLocations();
MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_);
if (cls->NeedsAccessCheck()) {
@@ -557,6 +557,9 @@ void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const {
}
void CodeGenerator::AllocateLocations(HInstruction* instruction) {
+ for (HEnvironment* env = instruction->GetEnvironment(); env != nullptr; env = env->GetParent()) {
+ env->AllocateLocations();
+ }
instruction->Accept(GetLocationBuilder());
DCHECK(CheckTypeConsistency(instruction));
LocationSummary* locations = instruction->GetLocations();
@@ -1400,20 +1403,6 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) {
locations->AddTemp(Location::RequiresRegister());
}
-uint32_t CodeGenerator::GetReferenceSlowFlagOffset() const {
- ScopedObjectAccess soa(Thread::Current());
- mirror::Class* klass = mirror::Reference::GetJavaLangRefReference();
- DCHECK(klass->IsInitialized());
- return klass->GetSlowPathFlagOffset().Uint32Value();
-}
-
-uint32_t CodeGenerator::GetReferenceDisableFlagOffset() const {
- ScopedObjectAccess soa(Thread::Current());
- mirror::Class* klass = mirror::Reference::GetJavaLangRefReference();
- DCHECK(klass->IsInitialized());
- return klass->GetDisableIntrinsicFlagOffset().Uint32Value();
-}
-
void CodeGenerator::EmitJitRoots(uint8_t* code,
Handle<mirror::ObjectArray<mirror::Object>> roots,
const uint8_t* roots_data) {
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index ea463eeb62..c9ba5c3357 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -31,10 +31,11 @@
#include "nodes.h"
#include "optimizing_compiler_stats.h"
#include "read_barrier_option.h"
+#include "stack.h"
#include "stack_map_stream.h"
#include "string_reference.h"
+#include "type_reference.h"
#include "utils/label.h"
-#include "utils/type_reference.h"
namespace art {
@@ -541,7 +542,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
case HLoadString::LoadKind::kBssEntry:
DCHECK(load->NeedsEnvironment());
return LocationSummary::kCallOnSlowPath;
- case HLoadString::LoadKind::kDexCacheViaMethod:
+ case HLoadString::LoadKind::kRuntimeCall:
DCHECK(load->NeedsEnvironment());
return LocationSummary::kCallOnMainOnly;
case HLoadString::LoadKind::kJitTableAddress:
@@ -572,9 +573,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
virtual void GenerateNop() = 0;
- uint32_t GetReferenceSlowFlagOffset() const;
- uint32_t GetReferenceDisableFlagOffset() const;
-
static QuickEntrypointEnum GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass);
protected:
@@ -842,7 +840,7 @@ class SlowPathGenerator {
const uint32_t dex_pc = instruction->GetDexPc();
auto iter = slow_path_map_.find(dex_pc);
if (iter != slow_path_map_.end()) {
- auto candidates = iter->second;
+ const ArenaVector<std::pair<InstructionType*, SlowPathCode*>>& candidates = iter->second;
for (const auto& it : candidates) {
InstructionType* other_instruction = it.first;
SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 419b2afe91..914ae177c4 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -16,6 +16,7 @@
#include "code_generator_arm.h"
+#include "arch/arm/asm_support_arm.h"
#include "arch/arm/instruction_set_features_arm.h"
#include "art_method.h"
#include "code_generator_utils.h"
@@ -25,6 +26,7 @@
#include "gc/accounting/card_table.h"
#include "intrinsics.h"
#include "intrinsics_arm.h"
+#include "linker/arm/relative_patcher_thumb2.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
#include "thread.h"
@@ -60,10 +62,45 @@ static constexpr DRegister DTMP = D31;
static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
+// Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
+// offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions.
+// For the Baker read barrier implementation using link-generated thunks we need to split
+// the offset explicitly.
+constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB;
+
+// Flags controlling the use of link-time generated thunks for Baker read barriers.
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
+
+// The reserved entrypoint register for link-time generated thunks.
+const Register kBakerCcEntrypointRegister = R4;
+
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value()
+static inline void CheckLastTempIsBakerCcEntrypointRegister(HInstruction* instruction) {
+ DCHECK_EQ(static_cast<uint32_t>(kBakerCcEntrypointRegister),
+ linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister);
+ DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u);
+ DCHECK_EQ(kBakerCcEntrypointRegister,
+ instruction->GetLocations()->GetTemp(
+ instruction->GetLocations()->GetTempCount() - 1u).AsRegister<Register>());
+}
+
+static inline void EmitPlaceholderBne(CodeGeneratorARM* codegen, Label* bne_label) {
+ ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(codegen->GetAssembler()));
+ __ BindTrackedLabel(bne_label);
+ Label placeholder_label;
+ __ b(&placeholder_label, NE); // Placeholder, patched at link-time.
+ __ Bind(&placeholder_label);
+}
+
+static inline bool CanEmitNarrowLdr(Register rt, Register rn, uint32_t offset) {
+ return ArmAssembler::IsLowRegister(rt) && ArmAssembler::IsLowRegister(rn) && offset < 32u;
+}
+
static constexpr int kRegListThreshold = 4;
// SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
@@ -824,7 +861,7 @@ class LoadReferenceWithBakerReadBarrierSlowPathARM : public ReadBarrierMarkSlowP
// Baker's read barriers, we need to perform the load of
// mirror::Object::monitor_ *before* the original reference load.
// This load-load ordering is required by the read barrier.
- // The fast path/slow path (for Baker's algorithm) should look like:
+ // The slow path (for Baker's algorithm) should look like:
//
// uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
// lfence; // Load fence or artificial data dependency to prevent load-load reordering
@@ -959,6 +996,18 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM
__ Bind(GetEntryLabel());
+ // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM's:
+ //
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // old_ref = ref;
+ // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // compareAndSwapObject(obj, field_offset, old_ref, ref);
+ // }
+
// /* int32_t */ monitor = obj->monitor_
uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
__ LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset);
@@ -1607,6 +1656,34 @@ static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARM* codegen) {
}
}
+static int64_t AdjustConstantForCondition(int64_t value,
+ IfCondition* condition,
+ IfCondition* opposite) {
+ if (value == 1) {
+ if (*condition == kCondB) {
+ value = 0;
+ *condition = kCondEQ;
+ *opposite = kCondNE;
+ } else if (*condition == kCondAE) {
+ value = 0;
+ *condition = kCondNE;
+ *opposite = kCondEQ;
+ }
+ } else if (value == -1) {
+ if (*condition == kCondGT) {
+ value = 0;
+ *condition = kCondGE;
+ *opposite = kCondLT;
+ } else if (*condition == kCondLE) {
+ value = 0;
+ *condition = kCondLT;
+ *opposite = kCondGE;
+ }
+ }
+
+ return value;
+}
+
static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* condition,
bool invert,
CodeGeneratorARM* codegen) {
@@ -1620,7 +1697,7 @@ static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* cond
std::swap(cond, opposite);
}
- std::pair<Condition, Condition> ret;
+ std::pair<Condition, Condition> ret(EQ, NE);
const Location left = locations->InAt(0);
const Location right = locations->InAt(1);
@@ -1628,7 +1705,38 @@ static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* cond
const Register left_high = left.AsRegisterPairHigh<Register>();
const Register left_low = left.AsRegisterPairLow<Register>();
- int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+ int64_t value = AdjustConstantForCondition(right.GetConstant()->AsLongConstant()->GetValue(),
+ &cond,
+ &opposite);
+
+ // Comparisons against 0 are common enough to deserve special attention.
+ if (value == 0) {
+ switch (cond) {
+ case kCondNE:
+ // x > 0 iff x != 0 when the comparison is unsigned.
+ case kCondA:
+ ret = std::make_pair(NE, EQ);
+ FALLTHROUGH_INTENDED;
+ case kCondEQ:
+ // x <= 0 iff x == 0 when the comparison is unsigned.
+ case kCondBE:
+ __ orrs(IP, left_low, ShifterOperand(left_high));
+ return ret;
+ case kCondLT:
+ case kCondGE:
+ __ cmp(left_high, ShifterOperand(0));
+ return std::make_pair(ARMCondition(cond), ARMCondition(opposite));
+ // Trivially true or false.
+ case kCondB:
+ ret = std::make_pair(NE, EQ);
+ FALLTHROUGH_INTENDED;
+ case kCondAE:
+ __ cmp(left_low, ShifterOperand(left_low));
+ return ret;
+ default:
+ break;
+ }
+ }
switch (cond) {
case kCondEQ:
@@ -1788,10 +1896,14 @@ static std::pair<Condition, Condition> GenerateTest(HCondition* condition,
static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) {
if (condition->GetLeft()->GetType() == Primitive::kPrimLong) {
const LocationSummary* const locations = condition->GetLocations();
- const IfCondition c = condition->GetCondition();
if (locations->InAt(1).IsConstant()) {
- const int64_t value = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue();
+ IfCondition c = condition->GetCondition();
+ IfCondition opposite = condition->GetOppositeCondition();
+ const int64_t value = AdjustConstantForCondition(
+ Int64FromConstant(locations->InAt(1).GetConstant()),
+ &c,
+ &opposite);
ShifterOperand so;
if (c < kCondLT || c > kCondGE) {
@@ -1799,9 +1911,11 @@ static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) {
// we check that the least significant half of the first input to be compared
// is in a low register (the other half is read outside an IT block), and
// the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
- // encoding can be used.
- if (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) ||
- !IsUint<8>(Low32Bits(value))) {
+ // encoding can be used; 0 is always handled, no matter what registers are
+ // used by the first input.
+ if (value != 0 &&
+ (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) ||
+ !IsUint<8>(Low32Bits(value)))) {
return false;
}
} else if (c == kCondLE || c == kCondGT) {
@@ -1828,6 +1942,329 @@ static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) {
return true;
}
+static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARM* codegen) {
+ DCHECK(CanGenerateTest(cond, codegen->GetAssembler()));
+
+ const Register out = cond->GetLocations()->Out().AsRegister<Register>();
+ const auto condition = GenerateTest(cond, false, codegen);
+
+ __ mov(out, ShifterOperand(0), AL, kCcKeep);
+
+ if (ArmAssembler::IsLowRegister(out)) {
+ __ it(condition.first);
+ __ mov(out, ShifterOperand(1), condition.first);
+ } else {
+ Label done_label;
+ Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
+
+ __ b(final_label, condition.second);
+ __ LoadImmediate(out, 1);
+
+ if (done_label.IsLinked()) {
+ __ Bind(&done_label);
+ }
+ }
+}
+
+static void GenerateEqualLong(HCondition* cond, CodeGeneratorARM* codegen) {
+ DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong);
+
+ const LocationSummary* const locations = cond->GetLocations();
+ IfCondition condition = cond->GetCondition();
+ const Register out = locations->Out().AsRegister<Register>();
+ const Location left = locations->InAt(0);
+ const Location right = locations->InAt(1);
+ Register left_high = left.AsRegisterPairHigh<Register>();
+ Register left_low = left.AsRegisterPairLow<Register>();
+
+ if (right.IsConstant()) {
+ IfCondition opposite = cond->GetOppositeCondition();
+ const int64_t value = AdjustConstantForCondition(Int64FromConstant(right.GetConstant()),
+ &condition,
+ &opposite);
+ int32_t value_high = -High32Bits(value);
+ int32_t value_low = -Low32Bits(value);
+
+ // The output uses Location::kNoOutputOverlap.
+ if (out == left_high) {
+ std::swap(left_low, left_high);
+ std::swap(value_low, value_high);
+ }
+
+ __ AddConstant(out, left_low, value_low);
+ __ AddConstant(IP, left_high, value_high);
+ } else {
+ DCHECK(right.IsRegisterPair());
+ __ sub(IP, left_high, ShifterOperand(right.AsRegisterPairHigh<Register>()));
+ __ sub(out, left_low, ShifterOperand(right.AsRegisterPairLow<Register>()));
+ }
+
+ // Need to check after calling AdjustConstantForCondition().
+ DCHECK(condition == kCondEQ || condition == kCondNE) << condition;
+
+ if (condition == kCondNE && ArmAssembler::IsLowRegister(out)) {
+ __ orrs(out, out, ShifterOperand(IP));
+ __ it(NE);
+ __ mov(out, ShifterOperand(1), NE);
+ } else {
+ __ orr(out, out, ShifterOperand(IP));
+ codegen->GenerateConditionWithZero(condition, out, out, IP);
+ }
+}
+
+static void GenerateLongComparesAndJumps(HCondition* cond,
+ Label* true_label,
+ Label* false_label,
+ CodeGeneratorARM* codegen) {
+ LocationSummary* locations = cond->GetLocations();
+ Location left = locations->InAt(0);
+ Location right = locations->InAt(1);
+ IfCondition if_cond = cond->GetCondition();
+
+ Register left_high = left.AsRegisterPairHigh<Register>();
+ Register left_low = left.AsRegisterPairLow<Register>();
+ IfCondition true_high_cond = if_cond;
+ IfCondition false_high_cond = cond->GetOppositeCondition();
+ Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part
+
+ // Set the conditions for the test, remembering that == needs to be
+ // decided using the low words.
+ switch (if_cond) {
+ case kCondEQ:
+ case kCondNE:
+ // Nothing to do.
+ break;
+ case kCondLT:
+ false_high_cond = kCondGT;
+ break;
+ case kCondLE:
+ true_high_cond = kCondLT;
+ break;
+ case kCondGT:
+ false_high_cond = kCondLT;
+ break;
+ case kCondGE:
+ true_high_cond = kCondGT;
+ break;
+ case kCondB:
+ false_high_cond = kCondA;
+ break;
+ case kCondBE:
+ true_high_cond = kCondB;
+ break;
+ case kCondA:
+ false_high_cond = kCondB;
+ break;
+ case kCondAE:
+ true_high_cond = kCondA;
+ break;
+ }
+ if (right.IsConstant()) {
+ int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+ int32_t val_low = Low32Bits(value);
+ int32_t val_high = High32Bits(value);
+
+ __ CmpConstant(left_high, val_high);
+ if (if_cond == kCondNE) {
+ __ b(true_label, ARMCondition(true_high_cond));
+ } else if (if_cond == kCondEQ) {
+ __ b(false_label, ARMCondition(false_high_cond));
+ } else {
+ __ b(true_label, ARMCondition(true_high_cond));
+ __ b(false_label, ARMCondition(false_high_cond));
+ }
+ // Must be equal high, so compare the lows.
+ __ CmpConstant(left_low, val_low);
+ } else {
+ Register right_high = right.AsRegisterPairHigh<Register>();
+ Register right_low = right.AsRegisterPairLow<Register>();
+
+ __ cmp(left_high, ShifterOperand(right_high));
+ if (if_cond == kCondNE) {
+ __ b(true_label, ARMCondition(true_high_cond));
+ } else if (if_cond == kCondEQ) {
+ __ b(false_label, ARMCondition(false_high_cond));
+ } else {
+ __ b(true_label, ARMCondition(true_high_cond));
+ __ b(false_label, ARMCondition(false_high_cond));
+ }
+ // Must be equal high, so compare the lows.
+ __ cmp(left_low, ShifterOperand(right_low));
+ }
+ // The last comparison might be unsigned.
+ // TODO: optimize cases where this is always true/false
+ __ b(true_label, final_condition);
+}
+
+static void GenerateConditionLong(HCondition* cond, CodeGeneratorARM* codegen) {
+ DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong);
+
+ const LocationSummary* const locations = cond->GetLocations();
+ IfCondition condition = cond->GetCondition();
+ const Register out = locations->Out().AsRegister<Register>();
+ const Location left = locations->InAt(0);
+ const Location right = locations->InAt(1);
+
+ if (right.IsConstant()) {
+ IfCondition opposite = cond->GetOppositeCondition();
+
+ // Comparisons against 0 are common enough to deserve special attention.
+ if (AdjustConstantForCondition(Int64FromConstant(right.GetConstant()),
+ &condition,
+ &opposite) == 0) {
+ switch (condition) {
+ case kCondNE:
+ case kCondA:
+ if (ArmAssembler::IsLowRegister(out)) {
+ // We only care if both input registers are 0 or not.
+ __ orrs(out,
+ left.AsRegisterPairLow<Register>(),
+ ShifterOperand(left.AsRegisterPairHigh<Register>()));
+ __ it(NE);
+ __ mov(out, ShifterOperand(1), NE);
+ return;
+ }
+
+ FALLTHROUGH_INTENDED;
+ case kCondEQ:
+ case kCondBE:
+ // We only care if both input registers are 0 or not.
+ __ orr(out,
+ left.AsRegisterPairLow<Register>(),
+ ShifterOperand(left.AsRegisterPairHigh<Register>()));
+ codegen->GenerateConditionWithZero(condition, out, out);
+ return;
+ case kCondLT:
+ case kCondGE:
+ // We only care about the sign bit.
+ FALLTHROUGH_INTENDED;
+ case kCondAE:
+ case kCondB:
+ codegen->GenerateConditionWithZero(condition, out, left.AsRegisterPairHigh<Register>());
+ return;
+ case kCondLE:
+ case kCondGT:
+ default:
+ break;
+ }
+ }
+ }
+
+ if ((condition == kCondEQ || condition == kCondNE) &&
+ // If `out` is a low register, then the GenerateConditionGeneric()
+ // function generates a shorter code sequence that is still branchless.
+ (!ArmAssembler::IsLowRegister(out) || !CanGenerateTest(cond, codegen->GetAssembler()))) {
+ GenerateEqualLong(cond, codegen);
+ return;
+ }
+
+ if (CanGenerateTest(cond, codegen->GetAssembler())) {
+ GenerateConditionGeneric(cond, codegen);
+ return;
+ }
+
+ // Convert the jumps into the result.
+ Label done_label;
+ Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
+ Label true_label, false_label;
+
+ GenerateLongComparesAndJumps(cond, &true_label, &false_label, codegen);
+
+ // False case: result = 0.
+ __ Bind(&false_label);
+ __ mov(out, ShifterOperand(0));
+ __ b(final_label);
+
+ // True case: result = 1.
+ __ Bind(&true_label);
+ __ mov(out, ShifterOperand(1));
+
+ if (done_label.IsLinked()) {
+ __ Bind(&done_label);
+ }
+}
+
+static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, CodeGeneratorARM* codegen) {
+ const Primitive::Type type = cond->GetLeft()->GetType();
+
+ DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+
+ if (type == Primitive::kPrimLong) {
+ GenerateConditionLong(cond, codegen);
+ return;
+ }
+
+ const LocationSummary* const locations = cond->GetLocations();
+ IfCondition condition = cond->GetCondition();
+ Register in = locations->InAt(0).AsRegister<Register>();
+ const Register out = locations->Out().AsRegister<Register>();
+ const Location right = cond->GetLocations()->InAt(1);
+ int64_t value;
+
+ if (right.IsConstant()) {
+ IfCondition opposite = cond->GetOppositeCondition();
+
+ value = AdjustConstantForCondition(Int64FromConstant(right.GetConstant()),
+ &condition,
+ &opposite);
+
+ // Comparisons against 0 are common enough to deserve special attention.
+ if (value == 0) {
+ switch (condition) {
+ case kCondNE:
+ case kCondA:
+ if (ArmAssembler::IsLowRegister(out) && out == in) {
+ __ cmp(out, ShifterOperand(0));
+ __ it(NE);
+ __ mov(out, ShifterOperand(1), NE);
+ return;
+ }
+
+ FALLTHROUGH_INTENDED;
+ case kCondEQ:
+ case kCondBE:
+ case kCondLT:
+ case kCondGE:
+ case kCondAE:
+ case kCondB:
+ codegen->GenerateConditionWithZero(condition, out, in);
+ return;
+ case kCondLE:
+ case kCondGT:
+ default:
+ break;
+ }
+ }
+ }
+
+ if (condition == kCondEQ || condition == kCondNE) {
+ ShifterOperand operand;
+
+ if (right.IsConstant()) {
+ operand = ShifterOperand(value);
+ } else if (out == right.AsRegister<Register>()) {
+ // Avoid 32-bit instructions if possible.
+ operand = ShifterOperand(in);
+ in = right.AsRegister<Register>();
+ } else {
+ operand = ShifterOperand(right.AsRegister<Register>());
+ }
+
+ if (condition == kCondNE && ArmAssembler::IsLowRegister(out)) {
+ __ subs(out, in, operand);
+ __ it(NE);
+ __ mov(out, ShifterOperand(1), NE);
+ } else {
+ __ sub(out, in, operand);
+ codegen->GenerateConditionWithZero(condition, out, out);
+ }
+
+ return;
+ }
+
+ GenerateConditionGeneric(cond, codegen);
+}
+
static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
const Primitive::Type type = constant->GetType();
bool ret = false;
@@ -1960,13 +2397,11 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph,
uint32_literals_(std::less<uint32_t>(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- boot_image_string_patches_(StringReferenceValueComparator(),
- graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- boot_image_type_patches_(TypeReferenceValueComparator(),
- graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(StringReferenceValueComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(TypeReferenceValueComparator(),
@@ -2433,89 +2868,6 @@ void LocationsBuilderARM::VisitExit(HExit* exit) {
void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
}
-void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
- Label* true_label,
- Label* false_label) {
- LocationSummary* locations = cond->GetLocations();
- Location left = locations->InAt(0);
- Location right = locations->InAt(1);
- IfCondition if_cond = cond->GetCondition();
-
- Register left_high = left.AsRegisterPairHigh<Register>();
- Register left_low = left.AsRegisterPairLow<Register>();
- IfCondition true_high_cond = if_cond;
- IfCondition false_high_cond = cond->GetOppositeCondition();
- Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part
-
- // Set the conditions for the test, remembering that == needs to be
- // decided using the low words.
- switch (if_cond) {
- case kCondEQ:
- case kCondNE:
- // Nothing to do.
- break;
- case kCondLT:
- false_high_cond = kCondGT;
- break;
- case kCondLE:
- true_high_cond = kCondLT;
- break;
- case kCondGT:
- false_high_cond = kCondLT;
- break;
- case kCondGE:
- true_high_cond = kCondGT;
- break;
- case kCondB:
- false_high_cond = kCondA;
- break;
- case kCondBE:
- true_high_cond = kCondB;
- break;
- case kCondA:
- false_high_cond = kCondB;
- break;
- case kCondAE:
- true_high_cond = kCondA;
- break;
- }
- if (right.IsConstant()) {
- int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
- int32_t val_low = Low32Bits(value);
- int32_t val_high = High32Bits(value);
-
- __ CmpConstant(left_high, val_high);
- if (if_cond == kCondNE) {
- __ b(true_label, ARMCondition(true_high_cond));
- } else if (if_cond == kCondEQ) {
- __ b(false_label, ARMCondition(false_high_cond));
- } else {
- __ b(true_label, ARMCondition(true_high_cond));
- __ b(false_label, ARMCondition(false_high_cond));
- }
- // Must be equal high, so compare the lows.
- __ CmpConstant(left_low, val_low);
- } else {
- Register right_high = right.AsRegisterPairHigh<Register>();
- Register right_low = right.AsRegisterPairLow<Register>();
-
- __ cmp(left_high, ShifterOperand(right_high));
- if (if_cond == kCondNE) {
- __ b(true_label, ARMCondition(true_high_cond));
- } else if (if_cond == kCondEQ) {
- __ b(false_label, ARMCondition(false_high_cond));
- } else {
- __ b(true_label, ARMCondition(true_high_cond));
- __ b(false_label, ARMCondition(false_high_cond));
- }
- // Must be equal high, so compare the lows.
- __ cmp(left_low, ShifterOperand(right_low));
- }
- // The last comparison might be unsigned.
- // TODO: optimize cases where this is always true/false
- __ b(true_label, final_condition);
-}
-
void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condition,
Label* true_target_in,
Label* false_target_in) {
@@ -2557,7 +2909,7 @@ void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condi
Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
- GenerateLongComparesAndJumps(condition, true_target, false_target);
+ GenerateLongComparesAndJumps(condition, true_target, false_target, codegen_);
if (false_target != &fallthrough_target) {
__ b(false_target);
@@ -2872,6 +3224,80 @@ void CodeGeneratorARM::GenerateNop() {
__ nop();
}
+// `temp` is an extra temporary register that is used for some conditions;
+// callers may not specify it, in which case the method will use a scratch
+// register instead.
+void CodeGeneratorARM::GenerateConditionWithZero(IfCondition condition,
+ Register out,
+ Register in,
+ Register temp) {
+ switch (condition) {
+ case kCondEQ:
+ // x <= 0 iff x == 0 when the comparison is unsigned.
+ case kCondBE:
+ if (temp == kNoRegister || (ArmAssembler::IsLowRegister(out) && out != in)) {
+ temp = out;
+ }
+
+ // Avoid 32-bit instructions if possible; note that `in` and `temp` must be
+ // different as well.
+ if (ArmAssembler::IsLowRegister(in) && ArmAssembler::IsLowRegister(temp) && in != temp) {
+ // temp = - in; only 0 sets the carry flag.
+ __ rsbs(temp, in, ShifterOperand(0));
+
+ if (out == in) {
+ std::swap(in, temp);
+ }
+
+ // out = - in + in + carry = carry
+ __ adc(out, temp, ShifterOperand(in));
+ } else {
+ // If `in` is 0, then it has 32 leading zeros, and less than that otherwise.
+ __ clz(out, in);
+ // Any number less than 32 logically shifted right by 5 bits results in 0;
+ // the same operation on 32 yields 1.
+ __ Lsr(out, out, 5);
+ }
+
+ break;
+ case kCondNE:
+ // x > 0 iff x != 0 when the comparison is unsigned.
+ case kCondA:
+ if (out == in) {
+ if (temp == kNoRegister || in == temp) {
+ temp = IP;
+ }
+ } else if (temp == kNoRegister || !ArmAssembler::IsLowRegister(temp)) {
+ temp = out;
+ }
+
+ // temp = in - 1; only 0 does not set the carry flag.
+ __ subs(temp, in, ShifterOperand(1));
+ // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry
+ __ sbc(out, in, ShifterOperand(temp));
+ break;
+ case kCondGE:
+ __ mvn(out, ShifterOperand(in));
+ in = out;
+ FALLTHROUGH_INTENDED;
+ case kCondLT:
+ // We only care about the sign bit.
+ __ Lsr(out, in, 31);
+ break;
+ case kCondAE:
+ // Trivially true.
+ __ mov(out, ShifterOperand(1));
+ break;
+ case kCondB:
+ // Trivially false.
+ __ mov(out, ShifterOperand(0));
+ break;
+ default:
+ LOG(FATAL) << "Unexpected condition " << condition;
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderARM::HandleCondition(HCondition* cond) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
@@ -2908,48 +3334,48 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) {
return;
}
- const Register out = cond->GetLocations()->Out().AsRegister<Register>();
-
- if (ArmAssembler::IsLowRegister(out) && CanGenerateTest(cond, codegen_->GetAssembler())) {
- const auto condition = GenerateTest(cond, false, codegen_);
+ const Primitive::Type type = cond->GetLeft()->GetType();
- __ it(condition.first);
- __ mov(out, ShifterOperand(1), condition.first);
- __ it(condition.second);
- __ mov(out, ShifterOperand(0), condition.second);
+ if (Primitive::IsFloatingPointType(type)) {
+ GenerateConditionGeneric(cond, codegen_);
return;
}
- // Convert the jumps into the result.
- Label done_label;
- Label* const final_label = codegen_->GetFinalLabel(cond, &done_label);
+ DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
- if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) {
- Label true_label, false_label;
+ const IfCondition condition = cond->GetCondition();
- GenerateLongComparesAndJumps(cond, &true_label, &false_label);
+ // A condition with only one boolean input, or two boolean inputs without being equality or
+ // inequality results from transformations done by the instruction simplifier, and is handled
+ // as a regular condition with integral inputs.
+ if (type == Primitive::kPrimBoolean &&
+ cond->GetRight()->GetType() == Primitive::kPrimBoolean &&
+ (condition == kCondEQ || condition == kCondNE)) {
+ const LocationSummary* const locations = cond->GetLocations();
+ Register left = locations->InAt(0).AsRegister<Register>();
+ const Register out = locations->Out().AsRegister<Register>();
+ const Location right_loc = locations->InAt(1);
- // False case: result = 0.
- __ Bind(&false_label);
- __ LoadImmediate(out, 0);
- __ b(final_label);
+ // The constant case is handled by the instruction simplifier.
+ DCHECK(!right_loc.IsConstant());
- // True case: result = 1.
- __ Bind(&true_label);
- __ LoadImmediate(out, 1);
- } else {
- DCHECK(CanGenerateTest(cond, codegen_->GetAssembler()));
+ Register right = right_loc.AsRegister<Register>();
- const auto condition = GenerateTest(cond, false, codegen_);
+ // Avoid 32-bit instructions if possible.
+ if (out == right) {
+ std::swap(left, right);
+ }
- __ mov(out, ShifterOperand(0), AL, kCcKeep);
- __ b(final_label, condition.second);
- __ LoadImmediate(out, 1);
- }
+ __ eor(out, left, ShifterOperand(right));
- if (done_label.IsLinked()) {
- __ Bind(&done_label);
+ if (condition == kCondEQ) {
+ __ eor(out, out, ShifterOperand(1));
+ }
+
+ return;
}
+
+ GenerateConditionIntegralOrNonPrimitive(cond, codegen_);
}
void LocationsBuilderARM::VisitEqual(HEqual* comp) {
@@ -3082,6 +3508,15 @@ void InstructionCodeGeneratorARM::VisitDoubleConstant(HDoubleConstant* constant
// Will be generated at use site.
}
+void LocationsBuilderARM::VisitConstructorFence(HConstructorFence* constructor_fence) {
+ constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARM::VisitConstructorFence(
+ HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
void LocationsBuilderARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
memory_barrier->SetLocations(nullptr);
}
@@ -5287,7 +5722,18 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI
} else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
// We need a temporary register for the read barrier marking slow
// path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // If link-time thunks for the Baker read barrier are enabled, for AOT
+ // loads we need a temporary only if the offset is too big.
+ if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ // And we always need the reserved entrypoint register.
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
}
@@ -5753,11 +6199,35 @@ void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) {
Location::RequiresRegister(),
object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
- // Also need for String compression feature.
- if ((object_array_get_with_read_barrier && kUseBakerReadBarrier)
- || (mirror::kUseStringCompression && instruction->IsStringCharAt())) {
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation() &&
+ instruction->GetIndex()->IsConstant()) {
+ // Array loads with constant index are treated as field loads.
+ // If link-time thunks for the Baker read barrier are enabled, for AOT
+ // constant index loads we need a temporary only if the offset is too big.
+ uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
+ uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
+ offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot);
+ if (offset >= kReferenceLoadMinFarOffset) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ // And we always need the reserved entrypoint register.
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+ } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+ !Runtime::Current()->UseJitCompilation() &&
+ !instruction->GetIndex()->IsConstant()) {
+ // We need a non-scratch temporary for the array data pointer.
+ locations->AddTemp(Location::RequiresRegister());
+ // And we always need the reserved entrypoint register.
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ // Also need a temporary for String compression feature.
locations->AddTemp(Location::RequiresRegister());
}
}
@@ -5869,8 +6339,20 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
Location temp = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier call.
- codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+ DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
+ if (index.IsConstant()) {
+ // Array load with a constant index can be treated as a field load.
+ data_offset += helpers::Int32ConstantFrom(index) << Primitive::ComponentSizeShift(type);
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out_loc,
+ obj,
+ data_offset,
+ locations->GetTemp(0),
+ /* needs_null_check */ false);
+ } else {
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(
+ instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false);
+ }
} else {
Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
@@ -6275,6 +6757,15 @@ void InstructionCodeGeneratorARM::VisitIntermediateAddress(HIntermediateAddress*
}
}
+void LocationsBuilderARM::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) {
+ LOG(FATAL) << "Unreachable " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM::VisitIntermediateAddressIndex(
+ HIntermediateAddressIndex* instruction) {
+ LOG(FATAL) << "Unreachable " << instruction->GetId();
+}
+
void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) {
RegisterSet caller_saves = RegisterSet::Empty();
InvokeRuntimeCallingConvention calling_convention;
@@ -6645,21 +7136,15 @@ HLoadClass::LoadKind CodeGeneratorARM::GetSupportedLoadClassKind(
UNREACHABLE();
case HLoadClass::LoadKind::kReferrersClass:
break;
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
- DCHECK(!GetCompilerOptions().GetCompilePic());
- break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- DCHECK(GetCompilerOptions().GetCompilePic());
- break;
- case HLoadClass::LoadKind::kBootImageAddress:
- break;
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
case HLoadClass::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadClass::LoadKind::kDexCacheViaMethod:
+ case HLoadClass::LoadKind::kBootImageAddress:
+ case HLoadClass::LoadKind::kRuntimeCall:
break;
}
return desired_class_load_kind;
@@ -6667,7 +7152,7 @@ HLoadClass::LoadKind CodeGeneratorARM::GetSupportedLoadClassKind(
void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) {
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
- if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+ if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
InvokeRuntimeCallingConvention calling_convention;
CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
cls,
@@ -6707,13 +7192,20 @@ void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) {
// For non-Baker read barrier we have a temp-clobbering call.
}
}
+ if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
+ if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+ (load_kind == HLoadClass::LoadKind::kReferrersClass &&
+ !Runtime::Current()->UseJitCompilation())) {
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+ }
+ }
}
// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
// move.
void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
- if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+ if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
codegen_->GenerateLoadClassRuntimeCall(cls);
return;
}
@@ -6740,13 +7232,6 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
read_barrier_option);
break;
}
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
- DCHECK(codegen_->GetCompilerOptions().IsBootImage());
- DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
- __ LoadLiteral(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
- cls->GetTypeIndex()));
- break;
- }
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
@@ -6792,7 +7277,7 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
break;
}
- case HLoadClass::LoadKind::kDexCacheViaMethod:
+ case HLoadClass::LoadKind::kRuntimeCall:
case HLoadClass::LoadKind::kInvalid:
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
@@ -6846,21 +7331,15 @@ void InstructionCodeGeneratorARM::GenerateClassInitializationCheck(
HLoadString::LoadKind CodeGeneratorARM::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
- case HLoadString::LoadKind::kBootImageLinkTimeAddress:
- DCHECK(!GetCompilerOptions().GetCompilePic());
- break;
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- DCHECK(GetCompilerOptions().GetCompilePic());
- break;
- case HLoadString::LoadKind::kBootImageAddress:
- break;
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
case HLoadString::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadString::LoadKind::kDexCacheViaMethod:
+ case HLoadString::LoadKind::kBootImageAddress:
+ case HLoadString::LoadKind::kRuntimeCall:
break;
}
return desired_string_load_kind;
@@ -6870,7 +7349,7 @@ void LocationsBuilderARM::VisitLoadString(HLoadString* load) {
LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
HLoadString::LoadKind load_kind = load->GetLoadKind();
- if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+ if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
locations->SetOut(Location::RegisterLocation(R0));
} else {
locations->SetOut(Location::RequiresRegister());
@@ -6886,6 +7365,9 @@ void LocationsBuilderARM::VisitLoadString(HLoadString* load) {
// TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
// that the the kPrimNot result register is the same as the first argument register.
locations->SetCustomSlowPathCallerSaves(caller_saves);
+ if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+ }
} else {
// For non-Baker read barrier we have a temp-clobbering call.
}
@@ -6902,12 +7384,6 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) NO_THREAD_S
HLoadString::LoadKind load_kind = load->GetLoadKind();
switch (load_kind) {
- case HLoadString::LoadKind::kBootImageLinkTimeAddress: {
- DCHECK(codegen_->GetCompilerOptions().IsBootImage());
- __ LoadLiteral(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
- load->GetStringIndex()));
- return; // No dex cache slow path.
- }
case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorARM::PcRelativePatchInfo* labels =
@@ -6960,7 +7436,7 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) NO_THREAD_S
}
// TODO: Consider re-adding the compiler code to do string dex cache lookup again.
- DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
+ DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall);
InvokeRuntimeCallingConvention calling_convention;
DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
__ LoadImmediate(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
@@ -7056,6 +7532,9 @@ void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
// Note that TypeCheckSlowPathARM uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ codegen_->MaybeAddBakerCcEntrypointTempForFields(locations);
+ }
}
void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
@@ -7929,48 +8408,96 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used.
- //
- // Note that we do not actually check the value of
- // `GetIsGcMarking()` to decide whether to mark the loaded GC
- // root or not. Instead, we load into `temp` the read barrier
- // mark entry point corresponding to register `root`. If `temp`
- // is null, it means that `GetIsGcMarking()` is false, and vice
- // versa.
- //
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
- // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
- // // Slow path.
- // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
- // }
-
- // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
- Location temp = Location::RegisterLocation(LR);
- SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
- instruction, root, /* entrypoint */ temp);
- codegen_->AddSlowPath(slow_path);
+ if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded GC root or not. Instead, we
+ // load into `temp` (actually kBakerCcEntrypointRegister) the read
+ // barrier mark introspection entrypoint. If `temp` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk
+ // checks the reference and jumps to the entrypoint if needed.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &return_address;
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) {
+ // goto gc_root_thunk<root_reg>(lr)
+ // }
+ // return_address:
+
+ CheckLastTempIsBakerCcEntrypointRegister(instruction);
+ bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
+ uint32_t custom_data =
+ linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow);
+ Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
+
+ // entrypoint_reg =
+ // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(IP, 12);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
+ __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset);
+
+ Label return_address;
+ __ AdrCode(LR, &return_address);
+ __ CmpConstant(kBakerCcEntrypointRegister, 0);
+ // Currently the offset is always within range. If that changes,
+ // we shall have to split the load the same way as for fields.
+ DCHECK_LT(offset, kReferenceLoadMinFarOffset);
+ DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit());
+ ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow);
+ int old_position = GetAssembler()->GetBuffer()->GetPosition();
+ __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+ EmitPlaceholderBne(codegen_, bne_label);
+ __ Bind(&return_address);
+ DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(),
+ narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
+ } else {
+ // Note that we do not actually check the value of
+ // `GetIsGcMarking()` to decide whether to mark the loaded GC
+ // root or not. Instead, we load into `temp` the read barrier
+ // mark entry point corresponding to register `root`. If `temp`
+ // is null, it means that `GetIsGcMarking()` is false, and vice
+ // versa.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
+ // }
+
+ // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
+ Location temp = Location::RegisterLocation(LR);
+ SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
+ instruction, root, /* entrypoint */ temp);
+ codegen_->AddSlowPath(slow_path);
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
- // /* GcRoot<mirror::Object> */ root = *(obj + offset)
- __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
- static_assert(
- sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
- "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
- "have different sizes.");
- static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::CompressedReference<mirror::Object> and int32_t "
- "have different sizes.");
-
- // The entrypoint is null when the GC is not marking, this prevents one load compared to
- // checking GetIsGcMarking.
- __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ }
} else {
// GC root loaded through a slow path for read barriers other
// than Baker's.
@@ -7988,6 +8515,16 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct
}
}
+void CodeGeneratorARM::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields) {
+ if (!Runtime::Current()->UseJitCompilation()) {
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+ }
+ }
+}
+
void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
Register obj,
@@ -7997,6 +8534,76 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (actually kBakerCcEntrypointRegister) the read
+ // barrier mark introspection entrypoint. If `temp` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk checks
+ // the holder and jumps to the entrypoint if needed. If the holder is not
+ // gray, it creates a fake dependency and returns to the LDR instruction.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &gray_return_address;
+ // if (temp != nullptr) {
+ // goto field_thunk<holder_reg, base_reg>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // HeapReference<mirror::Object> reference = *(obj+offset);
+ // gray_return_address:
+
+ DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+ Register ref_reg = ref.AsRegister<Register>();
+ bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset);
+ Register base = obj;
+ if (offset >= kReferenceLoadMinFarOffset) {
+ base = temp.AsRegister<Register>();
+ DCHECK_NE(base, kBakerCcEntrypointRegister);
+ static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
+ __ AddConstant(base, obj, offset & ~(kReferenceLoadMinFarOffset - 1u));
+ offset &= (kReferenceLoadMinFarOffset - 1u);
+ // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large
+ // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely
+ // increase the overall code size when taking the generated thunks into account.
+ DCHECK(!narrow);
+ }
+ CheckLastTempIsBakerCcEntrypointRegister(instruction);
+ uint32_t custom_data =
+ linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj, narrow);
+ Label* bne_label = NewBakerReadBarrierPatch(custom_data);
+
+ // entrypoint_reg =
+ // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(IP, 12);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
+ __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset);
+
+ Label return_address;
+ __ AdrCode(LR, &return_address);
+ __ CmpConstant(kBakerCcEntrypointRegister, 0);
+ EmitPlaceholderBne(this, bne_label);
+ DCHECK_LT(offset, kReferenceLoadMinFarOffset);
+ DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit());
+ ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow);
+ int old_position = GetAssembler()->GetBuffer()->GetPosition();
+ __ LoadFromOffset(kLoadWord, ref_reg, base, offset);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+ __ Bind(&return_address);
+ DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(),
+ narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
+ return;
+ }
+
// /* HeapReference<Object> */ ref = *(obj + offset)
Location no_index = Location::NoLocation();
ScaleFactor no_scale_factor = TIMES_1;
@@ -8017,9 +8624,67 @@ void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr
static_assert(
sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ ScaleFactor scale_factor = TIMES_4;
+
+ if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (actually kBakerCcEntrypointRegister) the read
+ // barrier mark introspection entrypoint. If `temp` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk checks
+ // the holder and jumps to the entrypoint if needed. If the holder is not
+ // gray, it creates a fake dependency and returns to the LDR instruction.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &gray_return_address;
+ // if (temp != nullptr) {
+ // goto field_thunk<holder_reg, base_reg>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // HeapReference<mirror::Object> reference = data[index];
+ // gray_return_address:
+
+ DCHECK(index.IsValid());
+ Register index_reg = index.AsRegister<Register>();
+ Register ref_reg = ref.AsRegister<Register>();
+ Register data_reg = temp.AsRegister<Register>();
+ DCHECK_NE(data_reg, kBakerCcEntrypointRegister);
+
+ CheckLastTempIsBakerCcEntrypointRegister(instruction);
+ uint32_t custom_data =
+ linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg);
+ Label* bne_label = NewBakerReadBarrierPatch(custom_data);
+
+ // entrypoint_reg =
+ // Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(IP, 12);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
+ __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset);
+ __ AddConstant(data_reg, obj, data_offset);
+
+ Label return_address;
+ __ AdrCode(LR, &return_address);
+ __ CmpConstant(kBakerCcEntrypointRegister, 0);
+ EmitPlaceholderBne(this, bne_label);
+ ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
+ int old_position = GetAssembler()->GetBuffer()->GetPosition();
+ __ ldr(ref_reg, Address(data_reg, index_reg, LSL, scale_factor));
+ DCHECK(!needs_null_check); // The thunk cannot handle the null check.
+ GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+ __ Bind(&return_address);
+ DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(),
+ BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
+ return;
+ }
+
// /* HeapReference<Object> */ ref =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- ScaleFactor scale_factor = TIMES_4;
GenerateReferenceLoadWithBakerReadBarrier(
instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check);
}
@@ -8031,9 +8696,7 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
Location index,
ScaleFactor scale_factor,
Location temp,
- bool needs_null_check,
- bool always_update_field,
- Register* temp2) {
+ bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -8044,6 +8707,73 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// not.
//
// Note that we do not actually check the value of `GetIsGcMarking()`;
+ // instead, we load into `temp2` the read barrier mark entry point
+ // corresponding to register `ref`. If `temp2` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // }
+ // } else {
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // }
+
+ Register temp_reg = temp.AsRegister<Register>();
+
+ // Slow path marking the object `ref` when the GC is marking. The
+ // entrypoint will already be loaded in `temp2`.
+ Location temp2 = Location::RegisterLocation(LR);
+ SlowPathCodeARM* slow_path =
+ new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM(
+ instruction,
+ ref,
+ obj,
+ offset,
+ index,
+ scale_factor,
+ needs_null_check,
+ temp_reg,
+ /* entrypoint */ temp2);
+ AddSlowPath(slow_path);
+
+ // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ LoadFromOffset(kLoadWord, temp2.AsRegister<Register>(), TR, entry_point_offset);
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ CompareAndBranchIfNonZero(temp2.AsRegister<Register>(), slow_path->GetEntryLabel());
+ // Fast path: the GC is not marking: just load the reference.
+ GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ Location field_offset,
+ Location temp,
+ bool needs_null_check,
+ Register temp2) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+ // whether we need to enter the slow path to update the reference
+ // field within `obj`. Then, in the slow path, check the gray bit
+ // in the lock word of the reference's holder (`obj`) to decide
+ // whether to mark `ref` and update the field or not.
+ //
+ // Note that we do not actually check the value of `GetIsGcMarking()`;
// instead, we load into `temp3` the read barrier mark entry point
// corresponding to register `ref`. If `temp3` is null, it means
// that `GetIsGcMarking()` is false, and vice versa.
@@ -8056,52 +8786,30 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// HeapReference<mirror::Object> ref = *src; // Original reference load.
// bool is_gray = (rb_state == ReadBarrier::GrayState());
// if (is_gray) {
+ // old_ref = ref;
// ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // compareAndSwapObject(obj, field_offset, old_ref, ref);
// }
- // } else {
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
// }
Register temp_reg = temp.AsRegister<Register>();
- // Slow path marking the object `ref` when the GC is marking. The
- // entrypoint will already be loaded in `temp3`.
+ // Slow path updating the object reference at address `obj +
+ // field_offset` when the GC is marking. The entrypoint will already
+ // be loaded in `temp3`.
Location temp3 = Location::RegisterLocation(LR);
- SlowPathCodeARM* slow_path;
- if (always_update_field) {
- DCHECK(temp2 != nullptr);
- // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM only
- // supports address of the form `obj + field_offset`, where `obj`
- // is a register and `field_offset` is a register pair (of which
- // only the lower half is used). Thus `offset` and `scale_factor`
- // above are expected to be null in this code path.
- DCHECK_EQ(offset, 0u);
- DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
- Location field_offset = index;
- slow_path =
- new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM(
- instruction,
- ref,
- obj,
- offset,
- /* index */ field_offset,
- scale_factor,
- needs_null_check,
- temp_reg,
- *temp2,
- /* entrypoint */ temp3);
- } else {
- slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM(
- instruction,
- ref,
- obj,
- offset,
- index,
- scale_factor,
- needs_null_check,
- temp_reg,
- /* entrypoint */ temp3);
- }
+ SlowPathCodeARM* slow_path =
+ new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM(
+ instruction,
+ ref,
+ obj,
+ /* offset */ 0u,
+ /* index */ field_offset,
+ /* scale_factor */ ScaleFactor::TIMES_1,
+ needs_null_check,
+ temp_reg,
+ temp2,
+ /* entrypoint */ temp3);
AddSlowPath(slow_path);
// temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
@@ -8113,8 +8821,8 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// The entrypoint is null when the GC is not marking, this prevents one load compared to
// checking GetIsGcMarking.
__ CompareAndBranchIfNonZero(temp3.AsRegister<Register>(), slow_path->GetEntryLabel());
- // Fast path: just load the reference.
- GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+ // Fast path: the GC is not marking: nothing to do (the field is
+ // up-to-date, and we don't need to load the reference).
__ Bind(slow_path->GetExitLabel());
}
@@ -8245,7 +8953,8 @@ Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOr
// save one load. However, since this is just an intrinsic slow path we prefer this
// simple and more robust approach rather that trying to determine if that's the case.
SlowPathCode* slow_path = GetCurrentSlowPath();
- if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+ DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path.
+ if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
__ LoadFromOffset(kLoadWord, temp, SP, stack_offset);
return temp;
@@ -8253,8 +8962,7 @@ Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOr
return location.AsRegister<Register>();
}
-Location CodeGeneratorARM::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
- Location temp) {
+void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
switch (invoke->GetMethodLoadKind()) {
case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
@@ -8267,6 +8975,18 @@ Location CodeGeneratorARM::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
+ DCHECK(GetCompilerOptions().IsBootImage());
+ Register temp_reg = temp.AsRegister<Register>();
+ PcRelativePatchInfo* labels = NewPcRelativeMethodPatch(invoke->GetTargetMethod());
+ __ BindTrackedLabel(&labels->movw_label);
+ __ movw(temp_reg, /* placeholder */ 0u);
+ __ BindTrackedLabel(&labels->movt_label);
+ __ movt(temp_reg, /* placeholder */ 0u);
+ __ BindTrackedLabel(&labels->add_pc_label);
+ __ add(temp_reg, temp_reg, ShifterOperand(PC));
+ break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
__ LoadImmediate(temp.AsRegister<Register>(), invoke->GetMethodAddress());
break;
@@ -8303,11 +9023,6 @@ Location CodeGeneratorARM::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO
break;
}
}
- return callee_method;
-}
-
-void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
- Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
switch (invoke->GetCodePtrLocation()) {
case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
@@ -8359,9 +9074,11 @@ void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp
__ blx(LR);
}
-CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeStringPatch(
- const DexFile& dex_file, dex::StringIndex string_index) {
- return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
+CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeMethodPatch(
+ MethodReference target_method) {
+ return NewPcRelativePatch(*target_method.dex_file,
+ target_method.dex_method_index,
+ &pc_relative_method_patches_);
}
CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeTypePatch(
@@ -8374,6 +9091,11 @@ CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewTypeBssEntryPatch(
return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_);
}
+CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeStringPatch(
+ const DexFile& dex_file, dex::StringIndex string_index) {
+ return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
+}
+
CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeDexCacheArrayPatch(
const DexFile& dex_file, uint32_t element_offset) {
return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
@@ -8385,18 +9107,9 @@ CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativePatch(
return &patches->back();
}
-Literal* CodeGeneratorARM::DeduplicateBootImageStringLiteral(const DexFile& dex_file,
- dex::StringIndex string_index) {
- return boot_image_string_patches_.GetOrCreate(
- StringReference(&dex_file, string_index),
- [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
-}
-
-Literal* CodeGeneratorARM::DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
- dex::TypeIndex type_index) {
- return boot_image_type_patches_.GetOrCreate(
- TypeReference(&dex_file, type_index),
- [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+Label* CodeGeneratorARM::NewBakerReadBarrierPatch(uint32_t custom_data) {
+ baker_read_barrier_patches_.emplace_back(custom_data);
+ return &baker_read_barrier_patches_.back().label;
}
Literal* CodeGeneratorARM::DeduplicateBootImageAddressLiteral(uint32_t address) {
@@ -8447,43 +9160,32 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche
DCHECK(linker_patches->empty());
size_t size =
/* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() +
- boot_image_string_patches_.size() +
- /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
- boot_image_type_patches_.size() +
+ /* MOVW+MOVT for each entry */ 2u * pc_relative_method_patches_.size() +
/* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
- /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size();
+ /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
+ /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
+ baker_read_barrier_patches_.size();
linker_patches->reserve(size);
EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
linker_patches);
- for (const auto& entry : boot_image_string_patches_) {
- const StringReference& target_string = entry.first;
- Literal* literal = entry.second;
- DCHECK(literal->GetLabel()->IsBound());
- uint32_t literal_offset = literal->GetLabel()->Position();
- linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
- target_string.dex_file,
- target_string.string_index.index_));
- }
- if (!GetCompilerOptions().IsBootImage()) {
- DCHECK(pc_relative_type_patches_.empty());
- EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+ if (GetCompilerOptions().IsBootImage()) {
+ EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
linker_patches);
- } else {
EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
linker_patches);
EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
linker_patches);
+ } else {
+ DCHECK(pc_relative_method_patches_.empty());
+ DCHECK(pc_relative_type_patches_.empty());
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+ linker_patches);
}
EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
linker_patches);
- for (const auto& entry : boot_image_type_patches_) {
- const TypeReference& target_type = entry.first;
- Literal* literal = entry.second;
- DCHECK(literal->GetLabel()->IsBound());
- uint32_t literal_offset = literal->GetLabel()->Position();
- linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
- target_type.dex_file,
- target_type.type_index.index_));
+ for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
+ linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.Position(),
+ info.custom_data));
}
DCHECK_EQ(size, linker_patches->size());
}
@@ -8494,13 +9196,6 @@ Literal* CodeGeneratorARM::DeduplicateUint32Literal(uint32_t value, Uint32ToLite
[this, value]() { return __ NewLiteral<uint32_t>(value); });
}
-Literal* CodeGeneratorARM::DeduplicateMethodLiteral(MethodReference target_method,
- MethodToLiteralMap* map) {
- return map->GetOrCreate(
- target_method,
- [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
-}
-
void LocationsBuilderARM::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
@@ -8716,14 +9411,20 @@ static void PatchJitRootUse(uint8_t* code,
void CodeGeneratorARM::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
for (const auto& entry : jit_string_patches_) {
- const auto& it = jit_string_roots_.find(entry.first);
+ const StringReference& string_reference = entry.first;
+ Literal* table_entry_literal = entry.second;
+ const auto it = jit_string_roots_.find(string_reference);
DCHECK(it != jit_string_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
for (const auto& entry : jit_class_patches_) {
- const auto& it = jit_class_roots_.find(entry.first);
+ const TypeReference& type_reference = entry.first;
+ Literal* table_entry_literal = entry.second;
+ const auto it = jit_class_roots_.find(type_reference);
DCHECK(it != jit_class_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
}
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 86f2f21df7..5f37d3bff1 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -24,8 +24,8 @@
#include "nodes.h"
#include "string_reference.h"
#include "parallel_move_resolver.h"
+#include "type_reference.h"
#include "utils/arm/assembler_thumb2.h"
-#include "utils/type_reference.h"
namespace art {
namespace arm {
@@ -299,7 +299,6 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator {
void GenerateCompareTestAndBranch(HCondition* condition,
Label* true_target,
Label* false_target);
- void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
void DivRemByPowerOfTwo(HBinaryOperation* instruction);
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
@@ -456,7 +455,6 @@ class CodeGeneratorARM : public CodeGenerator {
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
HInvokeStaticOrDirect* invoke) OVERRIDE;
- Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
@@ -482,15 +480,18 @@ class CodeGeneratorARM : public CodeGenerator {
Label add_pc_label;
};
- PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
- dex::StringIndex string_index);
+ PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method);
PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
+ PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
+ dex::StringIndex string_index);
PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
uint32_t element_offset);
- Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
- dex::StringIndex string_index);
- Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index);
+
+ // Add a new baker read barrier patch and return the label to be bound
+ // before the BNE instruction.
+ Label* NewBakerReadBarrierPatch(uint32_t custom_data);
+
Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
Literal* DeduplicateJitStringLiteral(const DexFile& dex_file,
dex::StringIndex string_index,
@@ -503,6 +504,10 @@ class CodeGeneratorARM : public CodeGenerator {
void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+ // Maybe add the reserved entrypoint register as a temporary for field load. This temp
+ // is added only for AOT compilation if link-time generated thunks for fields are enabled.
+ void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations);
+
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -526,11 +531,6 @@ class CodeGeneratorARM : public CodeGenerator {
// Load the object reference located at the address
// `obj + offset + (index << scale_factor)`, held by object `obj`, into
// `ref`, and mark it if needed.
- //
- // If `always_update_field` is true, the value of the reference is
- // atomically updated in the holder (`obj`). This operation
- // requires an extra temporary register, which must be provided as a
- // non-null pointer (`temp2`).
void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
Register obj,
@@ -538,9 +538,27 @@ class CodeGeneratorARM : public CodeGenerator {
Location index,
ScaleFactor scale_factor,
Location temp,
- bool needs_null_check,
- bool always_update_field = false,
- Register* temp2 = nullptr);
+ bool needs_null_check);
+
+ // Generate code checking whether the the reference field at the
+ // address `obj + field_offset`, held by object `obj`, needs to be
+ // marked, and if so, marking it and updating the field within `obj`
+ // with the marked value.
+ //
+ // This routine is used for the implementation of the
+ // UnsafeCASObject intrinsic with Baker read barriers.
+ //
+ // This method has a structure similar to
+ // GenerateReferenceLoadWithBakerReadBarrier, but note that argument
+ // `ref` is only as a temporary here, and thus its value should not
+ // be used afterwards.
+ void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ Location field_offset,
+ Location temp,
+ bool needs_null_check,
+ Register temp2);
// Generate a heap reference load (with no read barrier).
void GenerateRawReferenceLoad(HInstruction* instruction,
@@ -604,11 +622,18 @@ class CodeGeneratorARM : public CodeGenerator {
void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE;
void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
+ // `temp` is an extra temporary register that is used for some conditions;
+ // callers may not specify it, in which case the method will use a scratch
+ // register instead.
+ void GenerateConditionWithZero(IfCondition condition,
+ Register out,
+ Register in,
+ Register temp = kNoRegister);
+
private:
Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>;
- using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
using StringToLiteralMap = ArenaSafeMap<StringReference,
Literal*,
StringReferenceValueComparator>;
@@ -616,8 +641,14 @@ class CodeGeneratorARM : public CodeGenerator {
Literal*,
TypeReferenceValueComparator>;
+ struct BakerReadBarrierPatchInfo {
+ explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { }
+
+ Label label;
+ uint32_t custom_data;
+ };
+
Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
- Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file,
uint32_t offset_or_index,
ArenaDeque<PcRelativePatchInfo>* patches);
@@ -638,16 +669,16 @@ class CodeGeneratorARM : public CodeGenerator {
Uint32ToLiteralMap uint32_literals_;
// PC-relative patch info for each HArmDexCacheArraysBase.
ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
- // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
- StringToLiteralMap boot_image_string_patches_;
- // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
- ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
- // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
- TypeToLiteralMap boot_image_type_patches_;
+ // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_;
// PC-relative type patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+ // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
+ ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+ // Baker read barrier patch info.
+ ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
// Patches for string literals in JIT compiled code.
StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index da146d72cd..f2b312362f 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -29,6 +29,7 @@
#include "linker/arm64/relative_patcher_arm64.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
+#include "lock_word.h"
#include "offsets.h"
#include "thread.h"
#include "utils/arm64/assembler_arm64.h"
@@ -91,6 +92,7 @@ constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
// Flags controlling the use of link-time generated thunks for Baker read barriers.
constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
// Some instructions have special requirements for a temporary, for example
@@ -855,7 +857,7 @@ class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlo
// Baker's read barriers, we need to perform the load of
// mirror::Object::monitor_ *before* the original reference load.
// This load-load ordering is required by the read barrier.
- // The fast path/slow path (for Baker's algorithm) should look like:
+ // The slow path (for Baker's algorithm) should look like:
//
// uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
// lfence; // Load fence or artificial data dependency to prevent load-load reordering
@@ -1006,6 +1008,18 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
__ Bind(GetEntryLabel());
+ // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM64's:
+ //
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // old_ref = ref;
+ // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // compareAndSwapObject(obj, field_offset, old_ref, ref);
+ // }
+
// /* int32_t */ monitor = obj->monitor_
uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
__ Ldr(temp_, HeapOperand(obj_, monitor_offset));
@@ -1436,13 +1450,10 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
uint64_literals_(std::less<uint64_t>(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- boot_image_string_patches_(StringReferenceValueComparator(),
- graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- boot_image_type_patches_(TypeReferenceValueComparator(),
- graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(StringReferenceValueComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
@@ -1502,7 +1513,7 @@ Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind ki
if (kind == Location::kRegister) {
scratch = LocationFrom(vixl_temps_.AcquireX());
} else {
- DCHECK(kind == Location::kFpuRegister);
+ DCHECK_EQ(kind, Location::kFpuRegister);
scratch = LocationFrom(codegen_->GetGraph()->HasSIMD()
? vixl_temps_.AcquireVRegisterOfSize(kQRegSize)
: vixl_temps_.AcquireD());
@@ -1730,9 +1741,9 @@ static bool CoherentConstantAndType(Location constant, Primitive::Type type) {
(cst->IsDoubleConstant() && type == Primitive::kPrimDouble);
}
-// Allocate a scratch register from the VIXL pool, querying first into
-// the floating-point register pool, and then the the core register
-// pool. This is essentially a reimplementation of
+// Allocate a scratch register from the VIXL pool, querying first
+// the floating-point register pool, and then the core register
+// pool. This is essentially a reimplementation of
// vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
// using a different allocation strategy.
static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
@@ -1880,7 +1891,7 @@ void CodeGeneratorARM64::MoveLocation(Location destination,
// ask for a scratch register of any type (core or FP).
//
// Also, we start by asking for a FP scratch register first, as the
- // demand of scratch core registers is higher. This is why we
+ // demand of scratch core registers is higher. This is why we
// use AcquireFPOrCoreCPURegisterOfSize instead of
// UseScratchRegisterScope::AcquireCPURegisterOfSize, which
// allocates core scratch registers first.
@@ -2648,6 +2659,38 @@ void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddres
Operand(InputOperandAt(instruction, 1)));
}
+void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+
+ HIntConstant* shift = instruction->GetShift()->AsIntConstant();
+
+ locations->SetInAt(0, Location::RequiresRegister());
+ // For byte case we don't need to shift the index variable so we can encode the data offset into
+ // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist
+ // data offset constant generation out of the loop and reduce the critical path length in the
+ // loop.
+ locations->SetInAt(1, shift->GetValue() == 0
+ ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant())
+ : Location::RequiresRegister());
+ locations->SetInAt(2, Location::ConstantLocation(shift));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex(
+ HIntermediateAddressIndex* instruction) {
+ Register index_reg = InputRegisterAt(instruction, 0);
+ uint32_t shift = Int64ConstantFrom(instruction->GetLocations()->InAt(2));
+ uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue();
+
+ if (shift == 0) {
+ __ Add(OutputRegister(instruction), index_reg, offset);
+ } else {
+ Register offset_reg = InputRegisterAt(instruction, 1);
+ __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift));
+ }
+}
+
void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
@@ -2764,6 +2807,7 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
// Object ArrayGet with Baker's read barrier case.
// Note that a potential implicit null check is handled in the
// CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
+ DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
if (index.IsConstant()) {
// Array load with a constant index can be treated as a field load.
offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
@@ -2774,12 +2818,12 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
obj.W(),
offset,
maybe_temp,
- /* needs_null_check */ true,
+ /* needs_null_check */ false,
/* use_load_acquire */ false);
} else {
Register temp = WRegisterFrom(locations->GetTemp(0));
codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true);
+ instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ false);
}
} else {
// General case.
@@ -4446,8 +4490,7 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStatic
return desired_dispatch_info;
}
-Location CodeGeneratorARM64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
- Location temp) {
+void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
// Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
switch (invoke->GetMethodLoadKind()) {
@@ -4461,6 +4504,17 @@ Location CodeGeneratorARM64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStati
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
+ DCHECK(GetCompilerOptions().IsBootImage());
+ // Add ADRP with its PC-relative method patch.
+ vixl::aarch64::Label* adrp_label = NewPcRelativeMethodPatch(invoke->GetTargetMethod());
+ EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
+ // Add ADD with its PC-relative method patch.
+ vixl::aarch64::Label* add_label =
+ NewPcRelativeMethodPatch(invoke->GetTargetMethod(), adrp_label);
+ EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp));
+ break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
// Load method address from literal pool.
__ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
@@ -4501,12 +4555,6 @@ Location CodeGeneratorARM64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStati
break;
}
}
- return callee_method;
-}
-
-void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
- // All registers are assumed to be correctly set up.
- Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
switch (invoke->GetCodePtrLocation()) {
case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
@@ -4584,12 +4632,13 @@ void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* i
codegen_->GenerateInvokePolymorphicCall(invoke);
}
-vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(
- const DexFile& dex_file,
- dex::StringIndex string_index,
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeMethodPatch(
+ MethodReference target_method,
vixl::aarch64::Label* adrp_label) {
- return
- NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_);
+ return NewPcRelativePatch(*target_method.dex_file,
+ target_method.dex_method_index,
+ adrp_label,
+ &pc_relative_method_patches_);
}
vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch(
@@ -4606,6 +4655,14 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch(
return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_);
}
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(
+ const DexFile& dex_file,
+ dex::StringIndex string_index,
+ vixl::aarch64::Label* adrp_label) {
+ return
+ NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_);
+}
+
vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch(
const DexFile& dex_file,
uint32_t element_offset,
@@ -4632,20 +4689,6 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
return label;
}
-vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageStringLiteral(
- const DexFile& dex_file, dex::StringIndex string_index) {
- return boot_image_string_patches_.GetOrCreate(
- StringReference(&dex_file, string_index),
- [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
-}
-
-vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageTypeLiteral(
- const DexFile& dex_file, dex::TypeIndex type_index) {
- return boot_image_type_patches_.GetOrCreate(
- TypeReference(&dex_file, type_index),
- [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
-}
-
vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
uint64_t address) {
return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
@@ -4712,11 +4755,10 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc
DCHECK(linker_patches->empty());
size_t size =
pc_relative_dex_cache_patches_.size() +
- boot_image_string_patches_.size() +
- pc_relative_string_patches_.size() +
- boot_image_type_patches_.size() +
+ pc_relative_method_patches_.size() +
pc_relative_type_patches_.size() +
type_bss_entry_patches_.size() +
+ pc_relative_string_patches_.size() +
baker_read_barrier_patches_.size();
linker_patches->reserve(size);
for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
@@ -4725,32 +4767,21 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc
info.pc_insn_label->GetLocation(),
info.offset_or_index));
}
- for (const auto& entry : boot_image_string_patches_) {
- const StringReference& target_string = entry.first;
- vixl::aarch64::Literal<uint32_t>* literal = entry.second;
- linker_patches->push_back(LinkerPatch::StringPatch(literal->GetOffset(),
- target_string.dex_file,
- target_string.string_index.index_));
- }
- if (!GetCompilerOptions().IsBootImage()) {
- DCHECK(pc_relative_type_patches_.empty());
- EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+ if (GetCompilerOptions().IsBootImage()) {
+ EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
linker_patches);
- } else {
EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
linker_patches);
EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
linker_patches);
+ } else {
+ DCHECK(pc_relative_method_patches_.empty());
+ DCHECK(pc_relative_type_patches_.empty());
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+ linker_patches);
}
EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
linker_patches);
- for (const auto& entry : boot_image_type_patches_) {
- const TypeReference& target_type = entry.first;
- vixl::aarch64::Literal<uint32_t>* literal = entry.second;
- linker_patches->push_back(LinkerPatch::TypePatch(literal->GetOffset(),
- target_type.dex_file,
- target_type.type_index.index_));
- }
for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(),
info.custom_data));
@@ -4771,14 +4802,6 @@ vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(u
[this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
}
-vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodLiteral(
- MethodReference target_method,
- MethodToLiteralMap* map) {
- return map->GetOrCreate(
- target_method,
- [this]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(/* placeholder */ 0u); });
-}
-
void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
// Explicit clinit checks triggered by static invokes must have been pruned by
// art::PrepareForRegisterAllocation.
@@ -4818,21 +4841,15 @@ HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
UNREACHABLE();
case HLoadClass::LoadKind::kReferrersClass:
break;
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
- DCHECK(!GetCompilerOptions().GetCompilePic());
- break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- DCHECK(GetCompilerOptions().GetCompilePic());
- break;
- case HLoadClass::LoadKind::kBootImageAddress:
- break;
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
case HLoadClass::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadClass::LoadKind::kDexCacheViaMethod:
+ case HLoadClass::LoadKind::kBootImageAddress:
+ case HLoadClass::LoadKind::kRuntimeCall:
break;
}
return desired_class_load_kind;
@@ -4840,7 +4857,7 @@ HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
- if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+ if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
InvokeRuntimeCallingConvention calling_convention;
CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
cls,
@@ -4885,7 +4902,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
// move.
void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
- if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+ if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
codegen_->GenerateLoadClassRuntimeCall(cls);
return;
}
@@ -4914,11 +4931,6 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
read_barrier_option);
break;
}
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
- DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
- __ Ldr(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
- cls->GetTypeIndex()));
- break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
// Add ADRP with its PC-relative type patch.
@@ -4972,7 +4984,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
read_barrier_option);
break;
}
- case HLoadClass::LoadKind::kDexCacheViaMethod:
+ case HLoadClass::LoadKind::kRuntimeCall:
case HLoadClass::LoadKind::kInvalid:
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
@@ -5020,21 +5032,15 @@ void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear A
HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
- case HLoadString::LoadKind::kBootImageLinkTimeAddress:
- DCHECK(!GetCompilerOptions().GetCompilePic());
- break;
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- DCHECK(GetCompilerOptions().GetCompilePic());
- break;
- case HLoadString::LoadKind::kBootImageAddress:
- break;
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
case HLoadString::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadString::LoadKind::kDexCacheViaMethod:
+ case HLoadString::LoadKind::kBootImageAddress:
+ case HLoadString::LoadKind::kRuntimeCall:
break;
}
return desired_string_load_kind;
@@ -5043,7 +5049,7 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
- if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
+ if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
InvokeRuntimeCallingConvention calling_convention;
locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
} else {
@@ -5073,10 +5079,6 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
Location out_loc = load->GetLocations()->Out();
switch (load->GetLoadKind()) {
- case HLoadString::LoadKind::kBootImageLinkTimeAddress:
- __ Ldr(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
- load->GetStringIndex()));
- return; // No dex cache slow path.
case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
// Add ADRP with its PC-relative String patch.
const DexFile& dex_file = load->GetDexFile();
@@ -5478,6 +5480,15 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
}
}
+void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) {
+ constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARM64::VisitConstructorFence(
+ HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
memory_barrier->SetLocations(nullptr);
}
@@ -5929,9 +5940,9 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
!Runtime::Current()->UseJitCompilation()) {
// Note that we do not actually check the value of `GetIsGcMarking()`
// to decide whether to mark the loaded GC root or not. Instead, we
- // load into `temp` the read barrier mark introspection entrypoint.
- // If `temp` is null, it means that `GetIsGcMarking()` is false, and
- // vice versa.
+ // load into `temp` (actually IP1) the read barrier mark introspection
+ // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
+ // false, and vice versa.
//
// We use link-time generated thunks for the slow path. That thunk
// checks the reference and jumps to the entrypoint if needed.
@@ -6055,24 +6066,24 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins
!use_load_acquire &&
!Runtime::Current()->UseJitCompilation()) {
// Note that we do not actually check the value of `GetIsGcMarking()`
- // to decide whether to mark the loaded GC root or not. Instead, we
- // load into `temp` the read barrier mark introspection entrypoint.
- // If `temp` is null, it means that `GetIsGcMarking()` is false, and
- // vice versa.
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (actually IP1) the read barrier mark introspection
+ // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
+ // false, and vice versa.
//
// We use link-time generated thunks for the slow path. That thunk checks
// the holder and jumps to the entrypoint if needed. If the holder is not
// gray, it creates a fake dependency and returns to the LDR instruction.
//
// temp = Thread::Current()->pReadBarrierMarkIntrospection
- // lr = &return_address;
+ // lr = &gray_return_address;
// if (temp != nullptr) {
// goto field_thunk<holder_reg, base_reg>(lr)
// }
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> root = *(obj+offset);
+ // HeapReference<mirror::Object> reference = *(obj+offset);
// gray_return_address:
DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
@@ -6142,16 +6153,74 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+
+ if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (actually IP1) the read barrier mark introspection
+ // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
+ // false, and vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk checks
+ // the holder and jumps to the entrypoint if needed. If the holder is not
+ // gray, it creates a fake dependency and returns to the LDR instruction.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &gray_return_address;
+ // if (temp != nullptr) {
+ // goto field_thunk<holder_reg, base_reg>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // HeapReference<mirror::Object> reference = data[index];
+ // gray_return_address:
+
+ DCHECK(index.IsValid());
+ Register index_reg = RegisterFrom(index, Primitive::kPrimInt);
+ Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ DCHECK(temps.IsAvailable(ip0));
+ DCHECK(temps.IsAvailable(ip1));
+ temps.Exclude(ip0, ip1);
+ uint32_t custom_data =
+ linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode());
+ vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
+
+ // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(ip0.GetCode(), 16u);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
+ __ Ldr(ip1, MemOperand(tr, entry_point_offset));
+ __ Add(temp.X(), obj.X(), Operand(data_offset));
+ EmissionCheckScope guard(GetVIXLAssembler(),
+ (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
+ vixl::aarch64::Label return_address;
+ __ adr(lr, &return_address);
+ __ Bind(cbnz_label);
+ __ cbnz(ip1, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
+ static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+ "Array LDR must be 1 instruction (4B) before the return address label; "
+ " 2 instructions (8B) for heap poisoning.");
+ __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
+ DCHECK(!needs_null_check); // The thunk cannot handle the null check.
+ GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+ __ Bind(&return_address);
+ return;
+ }
+
// Array cells are never volatile variables, therefore array loads
// never use Load-Acquire instructions on ARM64.
const bool use_load_acquire = false;
- static_assert(
- sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
// /* HeapReference<Object> */ ref =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot);
GenerateReferenceLoadWithBakerReadBarrier(instruction,
ref,
obj,
@@ -6171,8 +6240,7 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
size_t scale_factor,
Register temp,
bool needs_null_check,
- bool use_load_acquire,
- bool always_update_field) {
+ bool use_load_acquire) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
// If we are emitting an array load, we should not be using a
@@ -6209,41 +6277,18 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
// entrypoint will already be loaded in `temp2`.
Register temp2 = lr;
Location temp2_loc = LocationFrom(temp2);
- SlowPathCodeARM64* slow_path;
- if (always_update_field) {
- // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
- // only supports address of the form `obj + field_offset`, where
- // `obj` is a register and `field_offset` is a register. Thus
- // `offset` and `scale_factor` above are expected to be null in
- // this code path.
- DCHECK_EQ(offset, 0u);
- DCHECK_EQ(scale_factor, 0u); /* "times 1" */
- Location field_offset = index;
- slow_path =
- new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
- instruction,
- ref,
- obj,
- offset,
- /* index */ field_offset,
- scale_factor,
- needs_null_check,
- use_load_acquire,
- temp,
- /* entrypoint */ temp2_loc);
- } else {
- slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
- instruction,
- ref,
- obj,
- offset,
- index,
- scale_factor,
- needs_null_check,
- use_load_acquire,
- temp,
- /* entrypoint */ temp2_loc);
- }
+ SlowPathCodeARM64* slow_path =
+ new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
+ instruction,
+ ref,
+ obj,
+ offset,
+ index,
+ scale_factor,
+ needs_null_check,
+ use_load_acquire,
+ temp,
+ /* entrypoint */ temp2_loc);
AddSlowPath(slow_path);
// temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
@@ -6255,12 +6300,83 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
// The entrypoint is null when the GC is not marking, this prevents one load compared to
// checking GetIsGcMarking.
__ Cbnz(temp2, slow_path->GetEntryLabel());
- // Fast path: just load the reference.
+ // Fast path: the GC is not marking: just load the reference.
GenerateRawReferenceLoad(
instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
__ Bind(slow_path->GetExitLabel());
}
+void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ Location field_offset,
+ Register temp,
+ bool needs_null_check,
+ bool use_load_acquire) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ // If we are emitting an array load, we should not be using a
+ // Load Acquire instruction. In other words:
+ // `instruction->IsArrayGet()` => `!use_load_acquire`.
+ DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
+
+ // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+ // whether we need to enter the slow path to update the reference
+ // field within `obj`. Then, in the slow path, check the gray bit
+ // in the lock word of the reference's holder (`obj`) to decide
+ // whether to mark `ref` and update the field or not.
+ //
+ // Note that we do not actually check the value of `GetIsGcMarking()`;
+ // instead, we load into `temp2` the read barrier mark entry point
+ // corresponding to register `ref`. If `temp2` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // old_ref = ref;
+ // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // compareAndSwapObject(obj, field_offset, old_ref, ref);
+ // }
+ // }
+
+ // Slow path updating the object reference at address `obj + field_offset`
+ // when the GC is marking. The entrypoint will already be loaded in `temp2`.
+ Register temp2 = lr;
+ Location temp2_loc = LocationFrom(temp2);
+ SlowPathCodeARM64* slow_path =
+ new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
+ instruction,
+ ref,
+ obj,
+ /* offset */ 0u,
+ /* index */ field_offset,
+ /* scale_factor */ 0u /* "times 1" */,
+ needs_null_check,
+ use_load_acquire,
+ temp,
+ /* entrypoint */ temp2_loc);
+ AddSlowPath(slow_path);
+
+ // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ Ldr(temp2, MemOperand(tr, entry_point_offset));
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ Cbnz(temp2, slow_path->GetEntryLabel());
+ // Fast path: the GC is not marking: nothing to do (the field is
+ // up-to-date, and we don't need to load the reference).
+ __ Bind(slow_path->GetExitLabel());
+}
+
void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction,
Location ref,
Register obj,
@@ -6436,14 +6552,20 @@ static void PatchJitRootUse(uint8_t* code,
void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
for (const auto& entry : jit_string_patches_) {
- const auto& it = jit_string_roots_.find(entry.first);
+ const StringReference& string_reference = entry.first;
+ vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
+ const auto it = jit_string_roots_.find(string_reference);
DCHECK(it != jit_string_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
for (const auto& entry : jit_class_patches_) {
- const auto& it = jit_class_roots_.find(entry.first);
+ const TypeReference& type_reference = entry.first;
+ vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
+ const auto it = jit_class_roots_.find(type_reference);
DCHECK(it != jit_class_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
}
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 332ab49153..747fc9f0b1 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -25,8 +25,8 @@
#include "nodes.h"
#include "parallel_move_resolver.h"
#include "string_reference.h"
+#include "type_reference.h"
#include "utils/arm64/assembler_arm64.h"
-#include "utils/type_reference.h"
// TODO(VIXL): Make VIXL compile with -Wshadow.
#pragma GCC diagnostic push
@@ -318,12 +318,13 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void HandleGoto(HInstruction* got, HBasicBlock* successor);
- vixl::aarch64::MemOperand CreateVecMemRegisters(
+ vixl::aarch64::MemOperand VecAddress(
HVecMemoryOperation* instruction,
- Location* reg_loc,
- bool is_load,
// This function may acquire a scratch register.
- vixl::aarch64::UseScratchRegisterScope* temps_scope);
+ vixl::aarch64::UseScratchRegisterScope* temps_scope,
+ size_t size,
+ bool is_string_char_at,
+ /*out*/ vixl::aarch64::Register* scratch);
Arm64Assembler* const assembler_;
CodeGeneratorARM64* const codegen_;
@@ -539,7 +540,6 @@ class CodeGeneratorARM64 : public CodeGenerator {
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
HInvokeStaticOrDirect* invoke) OVERRIDE;
- Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
@@ -548,12 +548,11 @@ class CodeGeneratorARM64 : public CodeGenerator {
UNIMPLEMENTED(FATAL);
}
- // Add a new PC-relative string patch for an instruction and return the label
+ // Add a new PC-relative method patch for an instruction and return the label
// to be bound before the instruction. The instruction will be either the
// ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
// to the associated ADRP patch label).
- vixl::aarch64::Label* NewPcRelativeStringPatch(const DexFile& dex_file,
- dex::StringIndex string_index,
+ vixl::aarch64::Label* NewPcRelativeMethodPatch(MethodReference target_method,
vixl::aarch64::Label* adrp_label = nullptr);
// Add a new PC-relative type patch for an instruction and return the label
@@ -572,6 +571,14 @@ class CodeGeneratorARM64 : public CodeGenerator {
dex::TypeIndex type_index,
vixl::aarch64::Label* adrp_label = nullptr);
+ // Add a new PC-relative string patch for an instruction and return the label
+ // to be bound before the instruction. The instruction will be either the
+ // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
+ // to the associated ADRP patch label).
+ vixl::aarch64::Label* NewPcRelativeStringPatch(const DexFile& dex_file,
+ dex::StringIndex string_index,
+ vixl::aarch64::Label* adrp_label = nullptr);
+
// Add a new PC-relative dex cache array patch for an instruction and return
// the label to be bound before the instruction. The instruction will be
// either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label`
@@ -585,11 +592,6 @@ class CodeGeneratorARM64 : public CodeGenerator {
// before the CBNZ instruction.
vixl::aarch64::Label* NewBakerReadBarrierPatch(uint32_t custom_data);
- vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageStringLiteral(
- const DexFile& dex_file,
- dex::StringIndex string_index);
- vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
- dex::TypeIndex type_index);
vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(const DexFile& dex_file,
dex::StringIndex string_index,
@@ -634,9 +636,6 @@ class CodeGeneratorARM64 : public CodeGenerator {
// Load the object reference located at the address
// `obj + offset + (index << scale_factor)`, held by object `obj`, into
// `ref`, and mark it if needed.
- //
- // If `always_update_field` is true, the value of the reference is
- // atomically updated in the holder (`obj`).
void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
vixl::aarch64::Register obj,
@@ -645,8 +644,27 @@ class CodeGeneratorARM64 : public CodeGenerator {
size_t scale_factor,
vixl::aarch64::Register temp,
bool needs_null_check,
- bool use_load_acquire,
- bool always_update_field = false);
+ bool use_load_acquire);
+
+ // Generate code checking whether the the reference field at the
+ // address `obj + field_offset`, held by object `obj`, needs to be
+ // marked, and if so, marking it and updating the field within `obj`
+ // with the marked value.
+ //
+ // This routine is used for the implementation of the
+ // UnsafeCASObject intrinsic with Baker read barriers.
+ //
+ // This method has a structure similar to
+ // GenerateReferenceLoadWithBakerReadBarrier, but note that argument
+ // `ref` is only as a temporary here, and thus its value should not
+ // be used afterwards.
+ void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ vixl::aarch64::Register obj,
+ Location field_offset,
+ vixl::aarch64::Register temp,
+ bool needs_null_check,
+ bool use_load_acquire);
// Generate a heap reference load (with no read barrier).
void GenerateRawReferenceLoad(HInstruction* instruction,
@@ -714,9 +732,6 @@ class CodeGeneratorARM64 : public CodeGenerator {
private:
using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>;
using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>;
- using MethodToLiteralMap = ArenaSafeMap<MethodReference,
- vixl::aarch64::Literal<uint64_t>*,
- MethodReferenceComparator>;
using StringToLiteralMap = ArenaSafeMap<StringReference,
vixl::aarch64::Literal<uint32_t>*,
StringReferenceValueComparator>;
@@ -727,8 +742,6 @@ class CodeGeneratorARM64 : public CodeGenerator {
vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value,
Uint32ToLiteralMap* map);
vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
- vixl::aarch64::Literal<uint64_t>* DeduplicateMethodLiteral(MethodReference target_method,
- MethodToLiteralMap* map);
// The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays
// and boot image strings/types. The only difference is the interpretation of the
@@ -780,16 +793,14 @@ class CodeGeneratorARM64 : public CodeGenerator {
Uint64ToLiteralMap uint64_literals_;
// PC-relative DexCache access info.
ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
- // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
- StringToLiteralMap boot_image_string_patches_;
- // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
- ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
- // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
- TypeToLiteralMap boot_image_type_patches_;
+ // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_;
// PC-relative type patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+ // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
+ ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
// Baker read barrier patch info.
ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index c37cc52a2b..93cbc3b17c 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -16,6 +16,7 @@
#include "code_generator_arm_vixl.h"
+#include "arch/arm/asm_support_arm.h"
#include "arch/arm/instruction_set_features_arm.h"
#include "art_method.h"
#include "code_generator_utils.h"
@@ -24,6 +25,7 @@
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
#include "intrinsics_arm_vixl.h"
+#include "linker/arm/relative_patcher_thumb2.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
#include "thread.h"
@@ -77,6 +79,20 @@ static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte;
static constexpr int kCurrentMethodStackOffset = 0;
static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
+// Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
+// offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions.
+// For the Baker read barrier implementation using link-generated thunks we need to split
+// the offset explicitly.
+constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB;
+
+// Flags controlling the use of link-time generated thunks for Baker read barriers.
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
+
+// The reserved entrypoint register for link-time generated thunks.
+const vixl32::Register kBakerCcEntrypointRegister = r4;
+
#ifdef __
#error "ARM Codegen VIXL macro-assembler macro already defined."
#endif
@@ -88,6 +104,60 @@ static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
// Marker that code is yet to be, and must, be implemented.
#define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented "
+static inline void ExcludeIPAndBakerCcEntrypointRegister(UseScratchRegisterScope* temps,
+ HInstruction* instruction) {
+ DCHECK(temps->IsAvailable(ip));
+ temps->Exclude(ip);
+ DCHECK(!temps->IsAvailable(kBakerCcEntrypointRegister));
+ DCHECK_EQ(kBakerCcEntrypointRegister.GetCode(),
+ linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister);
+ DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u);
+ DCHECK(RegisterFrom(instruction->GetLocations()->GetTemp(
+ instruction->GetLocations()->GetTempCount() - 1u)).Is(kBakerCcEntrypointRegister));
+}
+
+static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Label* patch_label) {
+ ExactAssemblyScope eas(codegen->GetVIXLAssembler(), kMaxInstructionSizeInBytes);
+ __ bind(patch_label);
+ vixl32::Label placeholder_label;
+ __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time.
+ __ bind(&placeholder_label);
+}
+
+static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) {
+ return rt.IsLow() && rn.IsLow() && offset < 32u;
+}
+
+class EmitAdrCode {
+ public:
+ EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label)
+ : assembler_(assembler), rd_(rd), label_(label) {
+ ExactAssemblyScope aas(assembler, kMaxInstructionSizeInBytes);
+ adr_location_ = assembler->GetCursorOffset();
+ assembler->adr(EncodingSize(Wide), rd, label);
+ }
+
+ ~EmitAdrCode() {
+ DCHECK(label_->IsBound());
+ // The ADR emitted by the assembler does not set the Thumb mode bit we need.
+ // TODO: Maybe extend VIXL to allow ADR for return address?
+ uint8_t* raw_adr = assembler_->GetBuffer()->GetOffsetAddress<uint8_t*>(adr_location_);
+ // Expecting ADR encoding T3 with `(offset & 1) == 0`.
+ DCHECK_EQ(raw_adr[1] & 0xfbu, 0xf2u); // Check bits 24-31, except 26.
+ DCHECK_EQ(raw_adr[0] & 0xffu, 0x0fu); // Check bits 16-23.
+ DCHECK_EQ(raw_adr[3] & 0x8fu, rd_.GetCode()); // Check bits 8-11 and 15.
+ DCHECK_EQ(raw_adr[2] & 0x01u, 0x00u); // Check bit 0, i.e. the `offset & 1`.
+ // Add the Thumb mode bit.
+ raw_adr[2] |= 0x01u;
+ }
+
+ private:
+ ArmVIXLMacroAssembler* const assembler_;
+ vixl32::Register rd_;
+ vixl32::Label* const label_;
+ int32_t adr_location_;
+};
+
// SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
// for each live D registers they treat two corresponding S registers as live ones.
//
@@ -851,7 +921,7 @@ class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkS
// Baker's read barriers, we need to perform the load of
// mirror::Object::monitor_ *before* the original reference load.
// This load-load ordering is required by the read barrier.
- // The fast path/slow path (for Baker's algorithm) should look like:
+ // The slow path (for Baker's algorithm) should look like:
//
// uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
// lfence; // Load fence or artificial data dependency to prevent load-load reordering
@@ -993,6 +1063,18 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
__ Bind(GetEntryLabel());
+ // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARMVIXL's:
+ //
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // old_ref = ref;
+ // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // compareAndSwapObject(obj, field_offset, old_ref, ref);
+ // }
+
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
// /* int32_t */ monitor = obj->monitor_
@@ -1693,6 +1775,34 @@ static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codege
}
}
+static int64_t AdjustConstantForCondition(int64_t value,
+ IfCondition* condition,
+ IfCondition* opposite) {
+ if (value == 1) {
+ if (*condition == kCondB) {
+ value = 0;
+ *condition = kCondEQ;
+ *opposite = kCondNE;
+ } else if (*condition == kCondAE) {
+ value = 0;
+ *condition = kCondNE;
+ *opposite = kCondEQ;
+ }
+ } else if (value == -1) {
+ if (*condition == kCondGT) {
+ value = 0;
+ *condition = kCondGE;
+ *opposite = kCondLT;
+ } else if (*condition == kCondLE) {
+ value = 0;
+ *condition = kCondLT;
+ *opposite = kCondGE;
+ }
+ }
+
+ return value;
+}
+
static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
HCondition* condition,
bool invert,
@@ -1715,7 +1825,37 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
const vixl32::Register left_high = HighRegisterFrom(left);
const vixl32::Register left_low = LowRegisterFrom(left);
- int64_t value = Int64ConstantFrom(right);
+ int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), &cond, &opposite);
+ UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
+
+ // Comparisons against 0 are common enough to deserve special attention.
+ if (value == 0) {
+ switch (cond) {
+ case kCondNE:
+ // x > 0 iff x != 0 when the comparison is unsigned.
+ case kCondA:
+ ret = std::make_pair(ne, eq);
+ FALLTHROUGH_INTENDED;
+ case kCondEQ:
+ // x <= 0 iff x == 0 when the comparison is unsigned.
+ case kCondBE:
+ __ Orrs(temps.Acquire(), left_low, left_high);
+ return ret;
+ case kCondLT:
+ case kCondGE:
+ __ Cmp(left_high, 0);
+ return std::make_pair(ARMCondition(cond), ARMCondition(opposite));
+ // Trivially true or false.
+ case kCondB:
+ ret = std::make_pair(ne, eq);
+ FALLTHROUGH_INTENDED;
+ case kCondAE:
+ __ Cmp(left_low, left_low);
+ return ret;
+ default:
+ break;
+ }
+ }
switch (cond) {
case kCondEQ:
@@ -1760,8 +1900,6 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
FALLTHROUGH_INTENDED;
case kCondGE:
case kCondLT: {
- UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
-
__ Cmp(left_low, Low32Bits(value));
__ Sbcs(temps.Acquire(), left_high, High32Bits(value));
ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
@@ -1879,18 +2017,22 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition*
static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler) {
if (condition->GetLeft()->GetType() == Primitive::kPrimLong) {
const LocationSummary* const locations = condition->GetLocations();
- const IfCondition c = condition->GetCondition();
if (locations->InAt(1).IsConstant()) {
- const int64_t value = Int64ConstantFrom(locations->InAt(1));
+ IfCondition c = condition->GetCondition();
+ IfCondition opposite = condition->GetOppositeCondition();
+ const int64_t value =
+ AdjustConstantForCondition(Int64ConstantFrom(locations->InAt(1)), &c, &opposite);
if (c < kCondLT || c > kCondGE) {
// Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
// we check that the least significant half of the first input to be compared
// is in a low register (the other half is read outside an IT block), and
// the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
- // encoding can be used.
- if (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value))) {
+ // encoding can be used; 0 is always handled, no matter what registers are
+ // used by the first input.
+ if (value != 0 &&
+ (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value)))) {
return false;
}
// TODO(VIXL): The rest of the checks are there to keep the backend in sync with
@@ -1909,6 +2051,354 @@ static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler)
return true;
}
+static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
+ DCHECK(CanGenerateTest(cond, codegen->GetAssembler()));
+
+ const vixl32::Register out = OutputRegister(cond);
+ const auto condition = GenerateTest(cond, false, codegen);
+
+ __ Mov(LeaveFlags, out, 0);
+
+ if (out.IsLow()) {
+ // We use the scope because of the IT block that follows.
+ ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
+ 2 * vixl32::k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kExactSize);
+
+ __ it(condition.first);
+ __ mov(condition.first, out, 1);
+ } else {
+ vixl32::Label done_label;
+ vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
+
+ __ B(condition.second, final_label, /* far_target */ false);
+ __ Mov(out, 1);
+
+ if (done_label.IsReferenced()) {
+ __ Bind(&done_label);
+ }
+ }
+}
+
+static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
+ DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong);
+
+ const LocationSummary* const locations = cond->GetLocations();
+ IfCondition condition = cond->GetCondition();
+ const vixl32::Register out = OutputRegister(cond);
+ const Location left = locations->InAt(0);
+ const Location right = locations->InAt(1);
+ vixl32::Register left_high = HighRegisterFrom(left);
+ vixl32::Register left_low = LowRegisterFrom(left);
+ vixl32::Register temp;
+ UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
+
+ if (right.IsConstant()) {
+ IfCondition opposite = cond->GetOppositeCondition();
+ const int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right),
+ &condition,
+ &opposite);
+ Operand right_high = High32Bits(value);
+ Operand right_low = Low32Bits(value);
+
+ // The output uses Location::kNoOutputOverlap.
+ if (out.Is(left_high)) {
+ std::swap(left_low, left_high);
+ std::swap(right_low, right_high);
+ }
+
+ __ Sub(out, left_low, right_low);
+ temp = temps.Acquire();
+ __ Sub(temp, left_high, right_high);
+ } else {
+ DCHECK(right.IsRegisterPair());
+ temp = temps.Acquire();
+ __ Sub(temp, left_high, HighRegisterFrom(right));
+ __ Sub(out, left_low, LowRegisterFrom(right));
+ }
+
+ // Need to check after calling AdjustConstantForCondition().
+ DCHECK(condition == kCondEQ || condition == kCondNE) << condition;
+
+ if (condition == kCondNE && out.IsLow()) {
+ __ Orrs(out, out, temp);
+
+ // We use the scope because of the IT block that follows.
+ ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
+ 2 * vixl32::k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kExactSize);
+
+ __ it(ne);
+ __ mov(ne, out, 1);
+ } else {
+ __ Orr(out, out, temp);
+ codegen->GenerateConditionWithZero(condition, out, out, temp);
+ }
+}
+
+static void GenerateLongComparesAndJumps(HCondition* cond,
+ vixl32::Label* true_label,
+ vixl32::Label* false_label,
+ CodeGeneratorARMVIXL* codegen,
+ bool is_far_target = true) {
+ LocationSummary* locations = cond->GetLocations();
+ Location left = locations->InAt(0);
+ Location right = locations->InAt(1);
+ IfCondition if_cond = cond->GetCondition();
+
+ vixl32::Register left_high = HighRegisterFrom(left);
+ vixl32::Register left_low = LowRegisterFrom(left);
+ IfCondition true_high_cond = if_cond;
+ IfCondition false_high_cond = cond->GetOppositeCondition();
+ vixl32::Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part
+
+ // Set the conditions for the test, remembering that == needs to be
+ // decided using the low words.
+ switch (if_cond) {
+ case kCondEQ:
+ case kCondNE:
+ // Nothing to do.
+ break;
+ case kCondLT:
+ false_high_cond = kCondGT;
+ break;
+ case kCondLE:
+ true_high_cond = kCondLT;
+ break;
+ case kCondGT:
+ false_high_cond = kCondLT;
+ break;
+ case kCondGE:
+ true_high_cond = kCondGT;
+ break;
+ case kCondB:
+ false_high_cond = kCondA;
+ break;
+ case kCondBE:
+ true_high_cond = kCondB;
+ break;
+ case kCondA:
+ false_high_cond = kCondB;
+ break;
+ case kCondAE:
+ true_high_cond = kCondA;
+ break;
+ }
+ if (right.IsConstant()) {
+ int64_t value = Int64ConstantFrom(right);
+ int32_t val_low = Low32Bits(value);
+ int32_t val_high = High32Bits(value);
+
+ __ Cmp(left_high, val_high);
+ if (if_cond == kCondNE) {
+ __ B(ARMCondition(true_high_cond), true_label, is_far_target);
+ } else if (if_cond == kCondEQ) {
+ __ B(ARMCondition(false_high_cond), false_label, is_far_target);
+ } else {
+ __ B(ARMCondition(true_high_cond), true_label, is_far_target);
+ __ B(ARMCondition(false_high_cond), false_label, is_far_target);
+ }
+ // Must be equal high, so compare the lows.
+ __ Cmp(left_low, val_low);
+ } else {
+ vixl32::Register right_high = HighRegisterFrom(right);
+ vixl32::Register right_low = LowRegisterFrom(right);
+
+ __ Cmp(left_high, right_high);
+ if (if_cond == kCondNE) {
+ __ B(ARMCondition(true_high_cond), true_label, is_far_target);
+ } else if (if_cond == kCondEQ) {
+ __ B(ARMCondition(false_high_cond), false_label, is_far_target);
+ } else {
+ __ B(ARMCondition(true_high_cond), true_label, is_far_target);
+ __ B(ARMCondition(false_high_cond), false_label, is_far_target);
+ }
+ // Must be equal high, so compare the lows.
+ __ Cmp(left_low, right_low);
+ }
+ // The last comparison might be unsigned.
+ // TODO: optimize cases where this is always true/false
+ __ B(final_condition, true_label, is_far_target);
+}
+
+static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
+ DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong);
+
+ const LocationSummary* const locations = cond->GetLocations();
+ IfCondition condition = cond->GetCondition();
+ const vixl32::Register out = OutputRegister(cond);
+ const Location left = locations->InAt(0);
+ const Location right = locations->InAt(1);
+
+ if (right.IsConstant()) {
+ IfCondition opposite = cond->GetOppositeCondition();
+
+ // Comparisons against 0 are common enough to deserve special attention.
+ if (AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite) == 0) {
+ switch (condition) {
+ case kCondNE:
+ case kCondA:
+ if (out.IsLow()) {
+ // We only care if both input registers are 0 or not.
+ __ Orrs(out, LowRegisterFrom(left), HighRegisterFrom(left));
+
+ // We use the scope because of the IT block that follows.
+ ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
+ 2 * vixl32::k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kExactSize);
+
+ __ it(ne);
+ __ mov(ne, out, 1);
+ return;
+ }
+
+ FALLTHROUGH_INTENDED;
+ case kCondEQ:
+ case kCondBE:
+ // We only care if both input registers are 0 or not.
+ __ Orr(out, LowRegisterFrom(left), HighRegisterFrom(left));
+ codegen->GenerateConditionWithZero(condition, out, out);
+ return;
+ case kCondLT:
+ case kCondGE:
+ // We only care about the sign bit.
+ FALLTHROUGH_INTENDED;
+ case kCondAE:
+ case kCondB:
+ codegen->GenerateConditionWithZero(condition, out, HighRegisterFrom(left));
+ return;
+ case kCondLE:
+ case kCondGT:
+ default:
+ break;
+ }
+ }
+ }
+
+ if ((condition == kCondEQ || condition == kCondNE) &&
+ // If `out` is a low register, then the GenerateConditionGeneric()
+ // function generates a shorter code sequence that is still branchless.
+ (!out.IsLow() || !CanGenerateTest(cond, codegen->GetAssembler()))) {
+ GenerateEqualLong(cond, codegen);
+ return;
+ }
+
+ if (CanGenerateTest(cond, codegen->GetAssembler())) {
+ GenerateConditionGeneric(cond, codegen);
+ return;
+ }
+
+ // Convert the jumps into the result.
+ vixl32::Label done_label;
+ vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
+ vixl32::Label true_label, false_label;
+
+ GenerateLongComparesAndJumps(cond, &true_label, &false_label, codegen, /* is_far_target */ false);
+
+ // False case: result = 0.
+ __ Bind(&false_label);
+ __ Mov(out, 0);
+ __ B(final_label);
+
+ // True case: result = 1.
+ __ Bind(&true_label);
+ __ Mov(out, 1);
+
+ if (done_label.IsReferenced()) {
+ __ Bind(&done_label);
+ }
+}
+
+static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
+ const Primitive::Type type = cond->GetLeft()->GetType();
+
+ DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+
+ if (type == Primitive::kPrimLong) {
+ GenerateConditionLong(cond, codegen);
+ return;
+ }
+
+ IfCondition condition = cond->GetCondition();
+ vixl32::Register in = InputRegisterAt(cond, 0);
+ const vixl32::Register out = OutputRegister(cond);
+ const Location right = cond->GetLocations()->InAt(1);
+ int64_t value;
+
+ if (right.IsConstant()) {
+ IfCondition opposite = cond->GetOppositeCondition();
+
+ value = AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite);
+
+ // Comparisons against 0 are common enough to deserve special attention.
+ if (value == 0) {
+ switch (condition) {
+ case kCondNE:
+ case kCondA:
+ if (out.IsLow() && out.Is(in)) {
+ __ Cmp(out, 0);
+
+ // We use the scope because of the IT block that follows.
+ ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
+ 2 * vixl32::k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kExactSize);
+
+ __ it(ne);
+ __ mov(ne, out, 1);
+ return;
+ }
+
+ FALLTHROUGH_INTENDED;
+ case kCondEQ:
+ case kCondBE:
+ case kCondLT:
+ case kCondGE:
+ case kCondAE:
+ case kCondB:
+ codegen->GenerateConditionWithZero(condition, out, in);
+ return;
+ case kCondLE:
+ case kCondGT:
+ default:
+ break;
+ }
+ }
+ }
+
+ if (condition == kCondEQ || condition == kCondNE) {
+ Operand operand(0);
+
+ if (right.IsConstant()) {
+ operand = Operand::From(value);
+ } else if (out.Is(RegisterFrom(right))) {
+ // Avoid 32-bit instructions if possible.
+ operand = InputOperandAt(cond, 0);
+ in = RegisterFrom(right);
+ } else {
+ operand = InputOperandAt(cond, 1);
+ }
+
+ if (condition == kCondNE && out.IsLow()) {
+ __ Subs(out, in, operand);
+
+ // We use the scope because of the IT block that follows.
+ ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
+ 2 * vixl32::k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kExactSize);
+
+ __ it(ne);
+ __ mov(ne, out, 1);
+ } else {
+ __ Sub(out, in, operand);
+ codegen->GenerateConditionWithZero(condition, out, out);
+ }
+
+ return;
+ }
+
+ GenerateConditionGeneric(cond, codegen);
+}
+
static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
const Primitive::Type type = constant->GetType();
bool ret = false;
@@ -2011,13 +2501,11 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
uint32_literals_(std::less<uint32_t>(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- boot_image_string_patches_(StringReferenceValueComparator(),
- graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- boot_image_type_patches_(TypeReferenceValueComparator(),
- graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(StringReferenceValueComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(TypeReferenceValueComparator(),
@@ -2468,92 +2956,10 @@ void LocationsBuilderARMVIXL::VisitExit(HExit* exit) {
void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
}
-void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* cond,
- vixl32::Label* true_label,
- vixl32::Label* false_label) {
- LocationSummary* locations = cond->GetLocations();
- Location left = locations->InAt(0);
- Location right = locations->InAt(1);
- IfCondition if_cond = cond->GetCondition();
-
- vixl32::Register left_high = HighRegisterFrom(left);
- vixl32::Register left_low = LowRegisterFrom(left);
- IfCondition true_high_cond = if_cond;
- IfCondition false_high_cond = cond->GetOppositeCondition();
- vixl32::Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part
-
- // Set the conditions for the test, remembering that == needs to be
- // decided using the low words.
- switch (if_cond) {
- case kCondEQ:
- case kCondNE:
- // Nothing to do.
- break;
- case kCondLT:
- false_high_cond = kCondGT;
- break;
- case kCondLE:
- true_high_cond = kCondLT;
- break;
- case kCondGT:
- false_high_cond = kCondLT;
- break;
- case kCondGE:
- true_high_cond = kCondGT;
- break;
- case kCondB:
- false_high_cond = kCondA;
- break;
- case kCondBE:
- true_high_cond = kCondB;
- break;
- case kCondA:
- false_high_cond = kCondB;
- break;
- case kCondAE:
- true_high_cond = kCondA;
- break;
- }
- if (right.IsConstant()) {
- int64_t value = Int64ConstantFrom(right);
- int32_t val_low = Low32Bits(value);
- int32_t val_high = High32Bits(value);
-
- __ Cmp(left_high, val_high);
- if (if_cond == kCondNE) {
- __ B(ARMCondition(true_high_cond), true_label);
- } else if (if_cond == kCondEQ) {
- __ B(ARMCondition(false_high_cond), false_label);
- } else {
- __ B(ARMCondition(true_high_cond), true_label);
- __ B(ARMCondition(false_high_cond), false_label);
- }
- // Must be equal high, so compare the lows.
- __ Cmp(left_low, val_low);
- } else {
- vixl32::Register right_high = HighRegisterFrom(right);
- vixl32::Register right_low = LowRegisterFrom(right);
-
- __ Cmp(left_high, right_high);
- if (if_cond == kCondNE) {
- __ B(ARMCondition(true_high_cond), true_label);
- } else if (if_cond == kCondEQ) {
- __ B(ARMCondition(false_high_cond), false_label);
- } else {
- __ B(ARMCondition(true_high_cond), true_label);
- __ B(ARMCondition(false_high_cond), false_label);
- }
- // Must be equal high, so compare the lows.
- __ Cmp(left_low, right_low);
- }
- // The last comparison might be unsigned.
- // TODO: optimize cases where this is always true/false
- __ B(final_condition, true_label);
-}
-
void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
vixl32::Label* true_target_in,
- vixl32::Label* false_target_in) {
+ vixl32::Label* false_target_in,
+ bool is_far_target) {
if (CanGenerateTest(condition, codegen_->GetAssembler())) {
vixl32::Label* non_fallthrough_target;
bool invert;
@@ -2575,7 +2981,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* c
const auto cond = GenerateTest(condition, invert, codegen_);
- __ B(cond.first, non_fallthrough_target);
+ __ B(cond.first, non_fallthrough_target, is_far_target);
if (emit_both_branches) {
// No target falls through, we need to branch.
@@ -2592,7 +2998,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* c
vixl32::Label* false_target = (false_target_in == nullptr) ? &fallthrough : false_target_in;
DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
- GenerateLongComparesAndJumps(condition, true_target, false_target);
+ GenerateLongComparesAndJumps(condition, true_target, false_target, codegen_, is_far_target);
if (false_target != &fallthrough) {
__ B(false_target);
@@ -2660,7 +3066,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instru
// the HCondition, generate the comparison directly.
Primitive::Type type = condition->InputAt(0)->GetType();
if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
- GenerateCompareTestAndBranch(condition, true_target, false_target);
+ GenerateCompareTestAndBranch(condition, true_target, false_target, far_target);
return;
}
@@ -2679,14 +3085,14 @@ void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instru
if (right.IsImmediate() && right.GetImmediate() == 0 && (arm_cond.Is(ne) || arm_cond.Is(eq))) {
if (arm_cond.Is(eq)) {
- __ CompareAndBranchIfZero(left, non_fallthrough_target);
+ __ CompareAndBranchIfZero(left, non_fallthrough_target, far_target);
} else {
DCHECK(arm_cond.Is(ne));
- __ CompareAndBranchIfNonZero(left, non_fallthrough_target);
+ __ CompareAndBranchIfNonZero(left, non_fallthrough_target, far_target);
}
} else {
__ Cmp(left, right);
- __ B(arm_cond, non_fallthrough_target);
+ __ B(arm_cond, non_fallthrough_target, far_target);
}
}
@@ -2903,6 +3309,83 @@ void CodeGeneratorARMVIXL::GenerateNop() {
__ Nop();
}
+// `temp` is an extra temporary register that is used for some conditions;
+// callers may not specify it, in which case the method will use a scratch
+// register instead.
+void CodeGeneratorARMVIXL::GenerateConditionWithZero(IfCondition condition,
+ vixl32::Register out,
+ vixl32::Register in,
+ vixl32::Register temp) {
+ switch (condition) {
+ case kCondEQ:
+ // x <= 0 iff x == 0 when the comparison is unsigned.
+ case kCondBE:
+ if (!temp.IsValid() || (out.IsLow() && !out.Is(in))) {
+ temp = out;
+ }
+
+ // Avoid 32-bit instructions if possible; note that `in` and `temp` must be
+ // different as well.
+ if (in.IsLow() && temp.IsLow() && !in.Is(temp)) {
+ // temp = - in; only 0 sets the carry flag.
+ __ Rsbs(temp, in, 0);
+
+ if (out.Is(in)) {
+ std::swap(in, temp);
+ }
+
+ // out = - in + in + carry = carry
+ __ Adc(out, temp, in);
+ } else {
+ // If `in` is 0, then it has 32 leading zeros, and less than that otherwise.
+ __ Clz(out, in);
+ // Any number less than 32 logically shifted right by 5 bits results in 0;
+ // the same operation on 32 yields 1.
+ __ Lsr(out, out, 5);
+ }
+
+ break;
+ case kCondNE:
+ // x > 0 iff x != 0 when the comparison is unsigned.
+ case kCondA: {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+
+ if (out.Is(in)) {
+ if (!temp.IsValid() || in.Is(temp)) {
+ temp = temps.Acquire();
+ }
+ } else if (!temp.IsValid() || !temp.IsLow()) {
+ temp = out;
+ }
+
+ // temp = in - 1; only 0 does not set the carry flag.
+ __ Subs(temp, in, 1);
+ // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry
+ __ Sbc(out, in, temp);
+ break;
+ }
+ case kCondGE:
+ __ Mvn(out, in);
+ in = out;
+ FALLTHROUGH_INTENDED;
+ case kCondLT:
+ // We only care about the sign bit.
+ __ Lsr(out, in, 31);
+ break;
+ case kCondAE:
+ // Trivially true.
+ __ Mov(out, 1);
+ break;
+ case kCondB:
+ // Trivially false.
+ __ Mov(out, 0);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected condition " << condition;
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
@@ -2939,52 +3422,47 @@ void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) {
return;
}
- const vixl32::Register out = OutputRegister(cond);
+ const Primitive::Type type = cond->GetLeft()->GetType();
- if (out.IsLow() && CanGenerateTest(cond, codegen_->GetAssembler())) {
- const auto condition = GenerateTest(cond, false, codegen_);
- // We use the scope because of the IT block that follows.
- ExactAssemblyScope guard(GetVIXLAssembler(),
- 4 * vixl32::k16BitT32InstructionSizeInBytes,
- CodeBufferCheckScope::kExactSize);
-
- __ it(condition.first);
- __ mov(condition.first, out, 1);
- __ it(condition.second);
- __ mov(condition.second, out, 0);
+ if (Primitive::IsFloatingPointType(type)) {
+ GenerateConditionGeneric(cond, codegen_);
return;
}
- // Convert the jumps into the result.
- vixl32::Label done_label;
- vixl32::Label* const final_label = codegen_->GetFinalLabel(cond, &done_label);
+ DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
- if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) {
- vixl32::Label true_label, false_label;
+ const IfCondition condition = cond->GetCondition();
- GenerateLongComparesAndJumps(cond, &true_label, &false_label);
+ // A condition with only one boolean input, or two boolean inputs without being equality or
+ // inequality results from transformations done by the instruction simplifier, and is handled
+ // as a regular condition with integral inputs.
+ if (type == Primitive::kPrimBoolean &&
+ cond->GetRight()->GetType() == Primitive::kPrimBoolean &&
+ (condition == kCondEQ || condition == kCondNE)) {
+ vixl32::Register left = InputRegisterAt(cond, 0);
+ const vixl32::Register out = OutputRegister(cond);
+ const Location right_loc = cond->GetLocations()->InAt(1);
- // False case: result = 0.
- __ Bind(&false_label);
- __ Mov(out, 0);
- __ B(final_label);
+ // The constant case is handled by the instruction simplifier.
+ DCHECK(!right_loc.IsConstant());
- // True case: result = 1.
- __ Bind(&true_label);
- __ Mov(out, 1);
- } else {
- DCHECK(CanGenerateTest(cond, codegen_->GetAssembler()));
+ vixl32::Register right = RegisterFrom(right_loc);
- const auto condition = GenerateTest(cond, false, codegen_);
+ // Avoid 32-bit instructions if possible.
+ if (out.Is(right)) {
+ std::swap(left, right);
+ }
- __ Mov(LeaveFlags, out, 0);
- __ B(condition.second, final_label, /* far_target */ false);
- __ Mov(out, 1);
- }
+ __ Eor(out, left, right);
- if (done_label.IsReferenced()) {
- __ Bind(&done_label);
+ if (condition == kCondEQ) {
+ __ Eor(out, out, 1);
+ }
+
+ return;
}
+
+ GenerateConditionIntegralOrNonPrimitive(cond, codegen_);
}
void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) {
@@ -3119,6 +3597,15 @@ void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant(
// Will be generated at use site.
}
+void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* constructor_fence) {
+ constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitConstructorFence(
+ HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
memory_barrier->SetLocations(nullptr);
}
@@ -5296,7 +5783,18 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction,
} else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
// We need a temporary register for the read barrier marking slow
// path in CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // If link-time thunks for the Baker read barrier are enabled, for AOT
+ // loads we need a temporary only if the offset is too big.
+ if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ // And we always need the reserved entrypoint register.
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
}
@@ -5763,11 +6261,35 @@ void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) {
Location::RequiresRegister(),
object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier.
- // Also need for String compression feature.
- if ((object_array_get_with_read_barrier && kUseBakerReadBarrier)
- || (mirror::kUseStringCompression && instruction->IsStringCharAt())) {
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier.
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation() &&
+ instruction->GetIndex()->IsConstant()) {
+ // Array loads with constant index are treated as field loads.
+ // If link-time thunks for the Baker read barrier are enabled, for AOT
+ // constant index loads we need a temporary only if the offset is too big.
+ uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
+ uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
+ offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot);
+ if (offset >= kReferenceLoadMinFarOffset) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ // And we always need the reserved entrypoint register.
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+ } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+ !Runtime::Current()->UseJitCompilation() &&
+ !instruction->GetIndex()->IsConstant()) {
+ // We need a non-scratch temporary for the array data pointer.
+ locations->AddTemp(Location::RequiresRegister());
+ // And we always need the reserved entrypoint register.
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ // Also need a temporary for String compression feature.
locations->AddTemp(Location::RequiresRegister());
}
}
@@ -5878,8 +6400,20 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
Location temp = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call.
- codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+ DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
+ if (index.IsConstant()) {
+ // Array load with a constant index can be treated as a field load.
+ data_offset += Int32ConstantFrom(index) << Primitive::ComponentSizeShift(type);
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out_loc,
+ obj,
+ data_offset,
+ locations->GetTemp(0),
+ /* needs_null_check */ false);
+ } else {
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(
+ instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false);
+ }
} else {
vixl32::Register out = OutputRegister(instruction);
if (index.IsConstant()) {
@@ -6315,6 +6849,16 @@ void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddress(HIntermediateAddr
}
}
+void LocationsBuilderARMVIXL::VisitIntermediateAddressIndex(
+ HIntermediateAddressIndex* instruction) {
+ LOG(FATAL) << "Unreachable " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddressIndex(
+ HIntermediateAddressIndex* instruction) {
+ LOG(FATAL) << "Unreachable " << instruction->GetId();
+}
+
void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
RegisterSet caller_saves = RegisterSet::Empty();
InvokeRuntimeCallingConventionARMVIXL calling_convention;
@@ -6707,21 +7251,15 @@ HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
UNREACHABLE();
case HLoadClass::LoadKind::kReferrersClass:
break;
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
- DCHECK(!GetCompilerOptions().GetCompilePic());
- break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- DCHECK(GetCompilerOptions().GetCompilePic());
- break;
- case HLoadClass::LoadKind::kBootImageAddress:
- break;
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
case HLoadClass::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadClass::LoadKind::kDexCacheViaMethod:
+ case HLoadClass::LoadKind::kBootImageAddress:
+ case HLoadClass::LoadKind::kRuntimeCall:
break;
}
return desired_class_load_kind;
@@ -6729,7 +7267,7 @@ HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
- if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+ if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
InvokeRuntimeCallingConventionARMVIXL calling_convention;
CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
cls,
@@ -6769,13 +7307,20 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
// For non-Baker read barrier we have a temp-clobbering call.
}
}
+ if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
+ if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+ (load_kind == HLoadClass::LoadKind::kReferrersClass &&
+ !Runtime::Current()->UseJitCompilation())) {
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+ }
+ }
}
// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
// move.
void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
- if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+ if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
codegen_->GenerateLoadClassRuntimeCall(cls);
return;
}
@@ -6802,13 +7347,6 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
read_barrier_option);
break;
}
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
- DCHECK(codegen_->GetCompilerOptions().IsBootImage());
- DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
- __ Ldr(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
- cls->GetTypeIndex()));
- break;
- }
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
@@ -6844,7 +7382,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
break;
}
- case HLoadClass::LoadKind::kDexCacheViaMethod:
+ case HLoadClass::LoadKind::kRuntimeCall:
case HLoadClass::LoadKind::kInvalid:
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
@@ -6905,21 +7443,15 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck(
HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
- case HLoadString::LoadKind::kBootImageLinkTimeAddress:
- DCHECK(!GetCompilerOptions().GetCompilePic());
- break;
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- DCHECK(GetCompilerOptions().GetCompilePic());
- break;
- case HLoadString::LoadKind::kBootImageAddress:
- break;
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
case HLoadString::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadString::LoadKind::kDexCacheViaMethod:
+ case HLoadString::LoadKind::kBootImageAddress:
+ case HLoadString::LoadKind::kRuntimeCall:
break;
}
return desired_string_load_kind;
@@ -6929,7 +7461,7 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
HLoadString::LoadKind load_kind = load->GetLoadKind();
- if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+ if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
locations->SetOut(LocationFrom(r0));
} else {
locations->SetOut(Location::RequiresRegister());
@@ -6945,6 +7477,9 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
// TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
// that the the kPrimNot result register is the same as the first argument register.
locations->SetCustomSlowPathCallerSaves(caller_saves);
+ if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+ }
} else {
// For non-Baker read barrier we have a temp-clobbering call.
}
@@ -6961,11 +7496,6 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
HLoadString::LoadKind load_kind = load->GetLoadKind();
switch (load_kind) {
- case HLoadString::LoadKind::kBootImageLinkTimeAddress: {
- __ Ldr(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
- load->GetStringIndex()));
- return; // No dex cache slow path.
- }
case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
@@ -7009,7 +7539,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
}
// TODO: Re-add the compiler code to do string dex cache lookup again.
- DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod);
+ DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall);
InvokeRuntimeCallingConventionARMVIXL calling_convention;
__ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
@@ -7107,6 +7637,9 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
// Note that TypeCheckSlowPathARM uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ codegen_->MaybeAddBakerCcEntrypointTempForFields(locations);
+ }
}
void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
@@ -8005,48 +8538,98 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used.
- //
- // Note that we do not actually check the value of
- // `GetIsGcMarking()` to decide whether to mark the loaded GC
- // root or not. Instead, we load into `temp` the read barrier
- // mark entry point corresponding to register `root`. If `temp`
- // is null, it means that `GetIsGcMarking()` is false, and vice
- // versa.
- //
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
- // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
- // // Slow path.
- // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
- // }
-
- // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
- Location temp = LocationFrom(lr);
- SlowPathCodeARMVIXL* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
- instruction, root, /* entrypoint */ temp);
- codegen_->AddSlowPath(slow_path);
+ if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded GC root or not. Instead, we
+ // load into `temp` (actually kBakerCcEntrypointRegister) the read
+ // barrier mark introspection entrypoint. If `temp` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk
+ // checks the reference and jumps to the entrypoint if needed.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &return_address;
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) {
+ // goto gc_root_thunk<root_reg>(lr)
+ // }
+ // return_address:
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset);
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
+ bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
+ uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(
+ root_reg.GetCode(), narrow);
+ vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
+
+ // entrypoint_reg =
+ // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(ip.GetCode(), 12u);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+ __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
+
+ vixl::EmissionCheckScope guard(GetVIXLAssembler(),
+ 4 * vixl32::kMaxInstructionSizeInBytes);
+ vixl32::Label return_address;
+ EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+ __ cmp(kBakerCcEntrypointRegister, Operand(0));
+ // Currently the offset is always within range. If that changes,
+ // we shall have to split the load the same way as for fields.
+ DCHECK_LT(offset, kReferenceLoadMinFarOffset);
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
+ EmitPlaceholderBne(codegen_, bne_label);
+ __ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
+ } else {
+ // Note that we do not actually check the value of
+ // `GetIsGcMarking()` to decide whether to mark the loaded GC
+ // root or not. Instead, we load into `temp` the read barrier
+ // mark entry point corresponding to register `root`. If `temp`
+ // is null, it means that `GetIsGcMarking()` is false, and vice
+ // versa.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
+ // }
+
+ // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
+ Location temp = LocationFrom(lr);
+ SlowPathCodeARMVIXL* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
+ instruction, root, /* entrypoint */ temp);
+ codegen_->AddSlowPath(slow_path);
- // /* GcRoot<mirror::Object> */ root = *(obj + offset)
- GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
- static_assert(
- sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
- "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
- "have different sizes.");
- static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::CompressedReference<mirror::Object> and int32_t "
- "have different sizes.");
-
- // The entrypoint is null when the GC is not marking, this prevents one load compared to
- // checking GetIsGcMarking.
- __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset);
+
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ }
} else {
// GC root loaded through a slow path for read barriers other
// than Baker's.
@@ -8064,6 +8647,16 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
}
}
+void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields) {
+ if (!Runtime::Current()->UseJitCompilation()) {
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+ }
+ }
+}
+
void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
vixl32::Register obj,
@@ -8073,6 +8666,85 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (actually kBakerCcEntrypointRegister) the read
+ // barrier mark introspection entrypoint. If `temp` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk checks
+ // the holder and jumps to the entrypoint if needed. If the holder is not
+ // gray, it creates a fake dependency and returns to the LDR instruction.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &gray_return_address;
+ // if (temp != nullptr) {
+ // goto field_thunk<holder_reg, base_reg>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // HeapReference<mirror::Object> reference = *(obj+offset);
+ // gray_return_address:
+
+ DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+ vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+ bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset);
+ vixl32::Register base = obj;
+ if (offset >= kReferenceLoadMinFarOffset) {
+ base = RegisterFrom(temp);
+ DCHECK(!base.Is(kBakerCcEntrypointRegister));
+ static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
+ __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
+ offset &= (kReferenceLoadMinFarOffset - 1u);
+ // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large
+ // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely
+ // increase the overall code size when taking the generated thunks into account.
+ DCHECK(!narrow);
+ }
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
+ uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ base.GetCode(), obj.GetCode(), narrow);
+ vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
+
+ // entrypoint_reg =
+ // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(ip.GetCode(), 12u);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+ __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
+
+ vixl::EmissionCheckScope guard(
+ GetVIXLAssembler(),
+ (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+ vixl32::Label return_address;
+ EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+ __ cmp(kBakerCcEntrypointRegister, Operand(0));
+ EmitPlaceholderBne(this, bne_label);
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset));
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ // Note: We need a specific width for the unpoisoning NEG.
+ if (kPoisonHeapReferences) {
+ if (narrow) {
+ // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
+ __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
+ } else {
+ __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+ }
+ }
+ __ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
+ return;
+ }
+
// /* HeapReference<Object> */ ref = *(obj + offset)
Location no_index = Location::NoLocation();
ScaleFactor no_scale_factor = TIMES_1;
@@ -8093,9 +8765,73 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i
static_assert(
sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ ScaleFactor scale_factor = TIMES_4;
+
+ if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (actually kBakerCcEntrypointRegister) the read
+ // barrier mark introspection entrypoint. If `temp` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk checks
+ // the holder and jumps to the entrypoint if needed. If the holder is not
+ // gray, it creates a fake dependency and returns to the LDR instruction.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &gray_return_address;
+ // if (temp != nullptr) {
+ // goto field_thunk<holder_reg, base_reg>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // HeapReference<mirror::Object> reference = data[index];
+ // gray_return_address:
+
+ DCHECK(index.IsValid());
+ vixl32::Register index_reg = RegisterFrom(index, Primitive::kPrimInt);
+ vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+ vixl32::Register data_reg = RegisterFrom(temp, Primitive::kPrimInt); // Raw pointer.
+ DCHECK(!data_reg.Is(kBakerCcEntrypointRegister));
+
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
+ uint32_t custom_data =
+ linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg.GetCode());
+ vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
+
+ // entrypoint_reg =
+ // Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(ip.GetCode(), 12u);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+ __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
+ __ Add(data_reg, obj, Operand(data_offset));
+
+ vixl::EmissionCheckScope guard(
+ GetVIXLAssembler(),
+ (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+ vixl32::Label return_address;
+ EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+ __ cmp(kBakerCcEntrypointRegister, Operand(0));
+ EmitPlaceholderBne(this, bne_label);
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
+ DCHECK(!needs_null_check); // The thunk cannot handle the null check.
+ // Note: We need a Wide NEG for the unpoisoning.
+ if (kPoisonHeapReferences) {
+ __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+ }
+ __ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
+ return;
+ }
+
// /* HeapReference<Object> */ ref =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- ScaleFactor scale_factor = TIMES_4;
GenerateReferenceLoadWithBakerReadBarrier(
instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check);
}
@@ -8107,9 +8843,7 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio
Location index,
ScaleFactor scale_factor,
Location temp,
- bool needs_null_check,
- bool always_update_field,
- vixl32::Register* temp2) {
+ bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -8120,6 +8854,73 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio
// not.
//
// Note that we do not actually check the value of `GetIsGcMarking()`;
+ // instead, we load into `temp2` the read barrier mark entry point
+ // corresponding to register `ref`. If `temp2` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // }
+ // } else {
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // }
+
+ vixl32::Register temp_reg = RegisterFrom(temp);
+
+ // Slow path marking the object `ref` when the GC is marking. The
+ // entrypoint will already be loaded in `temp2`.
+ Location temp2 = LocationFrom(lr);
+ SlowPathCodeARMVIXL* slow_path =
+ new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(
+ instruction,
+ ref,
+ obj,
+ offset,
+ index,
+ scale_factor,
+ needs_null_check,
+ temp_reg,
+ /* entrypoint */ temp2);
+ AddSlowPath(slow_path);
+
+ // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp2), tr, entry_point_offset);
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ CompareAndBranchIfNonZero(RegisterFrom(temp2), slow_path->GetEntryLabel());
+ // Fast path: the GC is not marking: just load the reference.
+ GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ vixl32::Register obj,
+ Location field_offset,
+ Location temp,
+ bool needs_null_check,
+ vixl32::Register temp2) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+ // whether we need to enter the slow path to update the reference
+ // field within `obj`. Then, in the slow path, check the gray bit
+ // in the lock word of the reference's holder (`obj`) to decide
+ // whether to mark `ref` and update the field or not.
+ //
+ // Note that we do not actually check the value of `GetIsGcMarking()`;
// instead, we load into `temp3` the read barrier mark entry point
// corresponding to register `ref`. If `temp3` is null, it means
// that `GetIsGcMarking()` is false, and vice versa.
@@ -8129,55 +8930,32 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio
// // Slow path.
// uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
// lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load.
// bool is_gray = (rb_state == ReadBarrier::GrayState());
// if (is_gray) {
+ // old_ref = ref;
// ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // compareAndSwapObject(obj, field_offset, old_ref, ref);
// }
- // } else {
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
// }
vixl32::Register temp_reg = RegisterFrom(temp);
- // Slow path marking the object `ref` when the GC is marking. The
- // entrypoint will already be loaded in `temp3`.
+ // Slow path updating the object reference at address `obj + field_offset`
+ // when the GC is marking. The entrypoint will already be loaded in `temp3`.
Location temp3 = LocationFrom(lr);
- SlowPathCodeARMVIXL* slow_path;
- if (always_update_field) {
- DCHECK(temp2 != nullptr);
- // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
- // only supports address of the form `obj + field_offset`, where
- // `obj` is a register and `field_offset` is a register pair (of
- // which only the lower half is used). Thus `offset` and
- // `scale_factor` above are expected to be null in this code path.
- DCHECK_EQ(offset, 0u);
- DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
- Location field_offset = index;
- slow_path =
- new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(
- instruction,
- ref,
- obj,
- offset,
- /* index */ field_offset,
- scale_factor,
- needs_null_check,
- temp_reg,
- *temp2,
- /* entrypoint */ temp3);
- } else {
- slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(
- instruction,
- ref,
- obj,
- offset,
- index,
- scale_factor,
- needs_null_check,
- temp_reg,
- /* entrypoint */ temp3);
- }
+ SlowPathCodeARMVIXL* slow_path =
+ new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(
+ instruction,
+ ref,
+ obj,
+ /* offset */ 0u,
+ /* index */ field_offset,
+ /* scale_factor */ ScaleFactor::TIMES_1,
+ needs_null_check,
+ temp_reg,
+ temp2,
+ /* entrypoint */ temp3);
AddSlowPath(slow_path);
// temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
@@ -8189,8 +8967,8 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio
// The entrypoint is null when the GC is not marking, this prevents one load compared to
// checking GetIsGcMarking.
__ CompareAndBranchIfNonZero(RegisterFrom(temp3), slow_path->GetEntryLabel());
- // Fast path: just load the reference.
- GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+ // Fast path: the GC is not marking: nothing to do (the field is
+ // up-to-date, and we don't need to load the reference).
__ Bind(slow_path->GetExitLabel());
}
@@ -8348,7 +9126,7 @@ vixl32::Register CodeGeneratorARMVIXL::GetInvokeStaticOrDirectExtraParameter(
return RegisterFrom(location);
}
-Location CodeGeneratorARMVIXL::GenerateCalleeMethodStaticOrDirectCall(
+void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
HInvokeStaticOrDirect* invoke, Location temp) {
Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
switch (invoke->GetMethodLoadKind()) {
@@ -8362,6 +9140,13 @@ Location CodeGeneratorARMVIXL::GenerateCalleeMethodStaticOrDirectCall(
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
+ DCHECK(GetCompilerOptions().IsBootImage());
+ PcRelativePatchInfo* labels = NewPcRelativeMethodPatch(invoke->GetTargetMethod());
+ vixl32::Register temp_reg = RegisterFrom(temp);
+ EmitMovwMovtPlaceholder(labels, temp_reg);
+ break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
__ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress()));
break;
@@ -8399,12 +9184,6 @@ Location CodeGeneratorARMVIXL::GenerateCalleeMethodStaticOrDirectCall(
break;
}
}
- return callee_method;
-}
-
-void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
- Location temp) {
- Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
switch (invoke->GetCodePtrLocation()) {
case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
@@ -8478,9 +9257,11 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall(HInvokeVirtual* invoke, Location
__ blx(lr);
}
-CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeStringPatch(
- const DexFile& dex_file, dex::StringIndex string_index) {
- return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeMethodPatch(
+ MethodReference target_method) {
+ return NewPcRelativePatch(*target_method.dex_file,
+ target_method.dex_method_index,
+ &pc_relative_method_patches_);
}
CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeTypePatch(
@@ -8493,6 +9274,11 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewTypeBssEntry
return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_);
}
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeStringPatch(
+ const DexFile& dex_file, dex::StringIndex string_index) {
+ return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
+}
+
CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeDexCacheArrayPatch(
const DexFile& dex_file, uint32_t element_offset) {
return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
@@ -8504,34 +9290,15 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePa
return &patches->back();
}
-VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageStringLiteral(
- const DexFile& dex_file,
- dex::StringIndex string_index) {
- return boot_image_string_patches_.GetOrCreate(
- StringReference(&dex_file, string_index),
- [this]() {
- return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u);
- });
-}
-
-VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageTypeLiteral(
- const DexFile& dex_file,
- dex::TypeIndex type_index) {
- return boot_image_type_patches_.GetOrCreate(
- TypeReference(&dex_file, type_index),
- [this]() {
- return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u);
- });
+vixl::aarch32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) {
+ baker_read_barrier_patches_.emplace_back(custom_data);
+ return &baker_read_barrier_patches_.back().label;
}
VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) {
return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
}
-VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateDexCacheAddressLiteral(uint32_t address) {
- return DeduplicateUint32Literal(address, &uint32_literals_);
-}
-
VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral(
const DexFile& dex_file,
dex::StringIndex string_index,
@@ -8581,43 +9348,32 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa
DCHECK(linker_patches->empty());
size_t size =
/* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() +
- boot_image_string_patches_.size() +
- /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
- boot_image_type_patches_.size() +
+ /* MOVW+MOVT for each entry */ 2u * pc_relative_method_patches_.size() +
/* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
- /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size();
+ /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
+ /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
+ baker_read_barrier_patches_.size();
linker_patches->reserve(size);
EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
linker_patches);
- for (const auto& entry : boot_image_string_patches_) {
- const StringReference& target_string = entry.first;
- VIXLUInt32Literal* literal = entry.second;
- DCHECK(literal->IsBound());
- uint32_t literal_offset = literal->GetLocation();
- linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
- target_string.dex_file,
- target_string.string_index.index_));
- }
- if (!GetCompilerOptions().IsBootImage()) {
- DCHECK(pc_relative_type_patches_.empty());
- EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+ if (GetCompilerOptions().IsBootImage()) {
+ EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
linker_patches);
- } else {
EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
linker_patches);
EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
linker_patches);
+ } else {
+ DCHECK(pc_relative_method_patches_.empty());
+ DCHECK(pc_relative_type_patches_.empty());
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+ linker_patches);
}
EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
linker_patches);
- for (const auto& entry : boot_image_type_patches_) {
- const TypeReference& target_type = entry.first;
- VIXLUInt32Literal* literal = entry.second;
- DCHECK(literal->IsBound());
- uint32_t literal_offset = literal->GetLocation();
- linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
- target_type.dex_file,
- target_type.type_index.index_));
+ for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
+ linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(),
+ info.custom_data));
}
DCHECK_EQ(size, linker_patches->size());
}
@@ -8632,16 +9388,6 @@ VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal(
});
}
-VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateMethodLiteral(
- MethodReference target_method,
- MethodToLiteralMap* map) {
- return map->GetOrCreate(
- target_method,
- [this]() {
- return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u);
- });
-}
-
void LocationsBuilderARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
@@ -8855,14 +9601,20 @@ static void PatchJitRootUse(uint8_t* code,
void CodeGeneratorARMVIXL::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
for (const auto& entry : jit_string_patches_) {
- const auto& it = jit_string_roots_.find(entry.first);
+ const StringReference& string_reference = entry.first;
+ VIXLUInt32Literal* table_entry_literal = entry.second;
+ const auto it = jit_string_roots_.find(string_reference);
DCHECK(it != jit_string_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
for (const auto& entry : jit_class_patches_) {
- const auto& it = jit_class_roots_.find(entry.first);
+ const TypeReference& type_reference = entry.first;
+ VIXLUInt32Literal* table_entry_literal = entry.second;
+ const auto it = jit_class_roots_.find(type_reference);
DCHECK(it != jit_class_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 1e9669dc38..f6e4de33a8 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -24,8 +24,8 @@
#include "nodes.h"
#include "string_reference.h"
#include "parallel_move_resolver.h"
+#include "type_reference.h"
#include "utils/arm/assembler_arm_vixl.h"
-#include "utils/type_reference.h"
// TODO(VIXL): make vixl clean wrt -Wshadow.
#pragma GCC diagnostic push
@@ -400,10 +400,8 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
bool far_target = true);
void GenerateCompareTestAndBranch(HCondition* condition,
vixl::aarch32::Label* true_target,
- vixl::aarch32::Label* false_target);
- void GenerateLongComparesAndJumps(HCondition* cond,
- vixl::aarch32::Label* true_label,
- vixl::aarch32::Label* false_label);
+ vixl::aarch32::Label* false_target,
+ bool is_far_target = true);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
void DivRemByPowerOfTwo(HBinaryOperation* instruction);
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
@@ -540,7 +538,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
HInvokeStaticOrDirect* invoke) OVERRIDE;
- Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
@@ -566,18 +563,19 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
vixl::aarch32::Label add_pc_label;
};
- PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
- dex::StringIndex string_index);
+ PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method);
PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
+ PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
+ dex::StringIndex string_index);
PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
uint32_t element_offset);
- VIXLUInt32Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
- dex::StringIndex string_index);
- VIXLUInt32Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
- dex::TypeIndex type_index);
+
+ // Add a new baker read barrier patch and return the label to be bound
+ // before the BNE instruction.
+ vixl::aarch32::Label* NewBakerReadBarrierPatch(uint32_t custom_data);
+
VIXLUInt32Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
- VIXLUInt32Literal* DeduplicateDexCacheAddressLiteral(uint32_t address);
VIXLUInt32Literal* DeduplicateJitStringLiteral(const DexFile& dex_file,
dex::StringIndex string_index,
Handle<mirror::String> handle);
@@ -589,6 +587,10 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+ // Maybe add the reserved entrypoint register as a temporary for field load. This temp
+ // is added only for AOT compilation if link-time generated thunks for fields are enabled.
+ void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations);
+
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -612,11 +614,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
// Load the object reference located at the address
// `obj + offset + (index << scale_factor)`, held by object `obj`, into
// `ref`, and mark it if needed.
- //
- // If `always_update_field` is true, the value of the reference is
- // atomically updated in the holder (`obj`). This operation
- // requires an extra temporary register, which must be provided as a
- // non-null pointer (`temp2`).
void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
vixl::aarch32::Register obj,
@@ -624,9 +621,27 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
Location index,
ScaleFactor scale_factor,
Location temp,
- bool needs_null_check,
- bool always_update_field = false,
- vixl::aarch32::Register* temp2 = nullptr);
+ bool needs_null_check);
+
+ // Generate code checking whether the the reference field at the
+ // address `obj + field_offset`, held by object `obj`, needs to be
+ // marked, and if so, marking it and updating the field within `obj`
+ // with the marked value.
+ //
+ // This routine is used for the implementation of the
+ // UnsafeCASObject intrinsic with Baker read barriers.
+ //
+ // This method has a structure similar to
+ // GenerateReferenceLoadWithBakerReadBarrier, but note that argument
+ // `ref` is only as a temporary here, and thus its value should not
+ // be used afterwards.
+ void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ vixl::aarch32::Register obj,
+ Location field_offset,
+ Location temp,
+ bool needs_null_check,
+ vixl::aarch32::Register temp2);
// Generate a heap reference load (with no read barrier).
void GenerateRawReferenceLoad(HInstruction* instruction,
@@ -699,13 +714,19 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
void EmitMovwMovtPlaceholder(CodeGeneratorARMVIXL::PcRelativePatchInfo* labels,
vixl::aarch32::Register out);
+ // `temp` is an extra temporary register that is used for some conditions;
+ // callers may not specify it, in which case the method will use a scratch
+ // register instead.
+ void GenerateConditionWithZero(IfCondition condition,
+ vixl::aarch32::Register out,
+ vixl::aarch32::Register in,
+ vixl::aarch32::Register temp = vixl32::Register());
+
private:
vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
vixl::aarch32::Register temp);
using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, VIXLUInt32Literal*>;
- using MethodToLiteralMap =
- ArenaSafeMap<MethodReference, VIXLUInt32Literal*, MethodReferenceComparator>;
using StringToLiteralMap = ArenaSafeMap<StringReference,
VIXLUInt32Literal*,
StringReferenceValueComparator>;
@@ -713,9 +734,14 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
VIXLUInt32Literal*,
TypeReferenceValueComparator>;
+ struct BakerReadBarrierPatchInfo {
+ explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { }
+
+ vixl::aarch32::Label label;
+ uint32_t custom_data;
+ };
+
VIXLUInt32Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
- VIXLUInt32Literal* DeduplicateMethodLiteral(MethodReference target_method,
- MethodToLiteralMap* map);
PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file,
uint32_t offset_or_index,
ArenaDeque<PcRelativePatchInfo>* patches);
@@ -740,16 +766,16 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
Uint32ToLiteralMap uint32_literals_;
// PC-relative patch info for each HArmDexCacheArraysBase.
ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
- // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
- StringToLiteralMap boot_image_string_patches_;
- // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
- ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
- // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
- TypeToLiteralMap boot_image_type_patches_;
+ // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_;
// PC-relative type patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+ // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
+ ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+ // Baker read barrier patch info.
+ ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
// Patches for string literals in JIT compiled code.
StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index b7e602420c..951d75a708 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -219,15 +219,33 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
+ Location out = locations->Out();
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
-
+ const bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6();
+ const bool r2_baker_or_no_read_barriers = !isR6 && (!kUseReadBarrier || kUseBakerReadBarrier);
+ InvokeRuntimeCallingConvention calling_convention;
+ DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
+ const bool is_load_class_bss_entry =
+ (cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
- InvokeRuntimeCallingConvention calling_convention;
+ // For HLoadClass/kBssEntry/kSaveEverything, make sure we preserve the address of the entry.
+ Register entry_address = kNoRegister;
+ if (is_load_class_bss_entry && r2_baker_or_no_read_barriers) {
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ bool temp_is_a0 = (temp == calling_convention.GetRegisterAt(0));
+ // In the unlucky case that `temp` is A0, we preserve the address in `out` across the
+ // kSaveEverything call.
+ entry_address = temp_is_a0 ? out.AsRegister<Register>() : temp;
+ DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0));
+ if (temp_is_a0) {
+ __ Move(entry_address, temp);
+ }
+ }
+
dex::TypeIndex type_index = cls_->GetTypeIndex();
__ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_);
-
QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
: kQuickInitializeType;
mips_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this);
@@ -237,25 +255,27 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS {
CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
}
+ // For HLoadClass/kBssEntry, store the resolved class to the BSS entry.
+ if (is_load_class_bss_entry && r2_baker_or_no_read_barriers) {
+ // The class entry address was preserved in `entry_address` thanks to kSaveEverything.
+ __ StoreToOffset(kStoreWord, calling_convention.GetRegisterAt(0), entry_address, 0);
+ }
+
// Move the class to the desired location.
- Location out = locations->Out();
if (out.IsValid()) {
DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
Primitive::Type type = instruction_->GetType();
- mips_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
+ mips_codegen->MoveLocation(out,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ type);
}
-
RestoreLiveRegisters(codegen, locations);
- // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
- DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
- if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
- DCHECK(out.IsValid());
- // TODO: Change art_quick_initialize_type/art_quick_initialize_static_storage to
- // kSaveEverything and use a temporary for the .bss entry address in the fast path,
- // so that we can avoid another calculation here.
- bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6();
+
+ // For HLoadClass/kBssEntry, store the resolved class to the BSS entry.
+ if (is_load_class_bss_entry && !r2_baker_or_no_read_barriers) {
+ // For non-Baker read barriers (or on R6), we need to re-calculate the address of
+ // the class entry.
Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
- DCHECK_NE(out.AsRegister<Register>(), AT);
CodeGeneratorMIPS::PcRelativePatchInfo* info =
mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index);
bool reordering = __ SetReorder(false);
@@ -286,40 +306,62 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS {
explicit LoadStringSlowPathMIPS(HLoadString* instruction) : SlowPathCodeMIPS(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ DCHECK(instruction_->IsLoadString());
+ DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry);
LocationSummary* locations = instruction_->GetLocations();
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+ HLoadString* load = instruction_->AsLoadString();
+ const dex::StringIndex string_index = load->GetStringIndex();
+ Register out = locations->Out().AsRegister<Register>();
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
-
+ const bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6();
+ const bool r2_baker_or_no_read_barriers = !isR6 && (!kUseReadBarrier || kUseBakerReadBarrier);
+ InvokeRuntimeCallingConvention calling_convention;
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
- InvokeRuntimeCallingConvention calling_convention;
- HLoadString* load = instruction_->AsLoadString();
- const dex::StringIndex string_index = load->GetStringIndex();
+ // For HLoadString/kBssEntry/kSaveEverything, make sure we preserve the address of the entry.
+ Register entry_address = kNoRegister;
+ if (r2_baker_or_no_read_barriers) {
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ bool temp_is_a0 = (temp == calling_convention.GetRegisterAt(0));
+ // In the unlucky case that `temp` is A0, we preserve the address in `out` across the
+ // kSaveEverything call.
+ entry_address = temp_is_a0 ? out : temp;
+ DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0));
+ if (temp_is_a0) {
+ __ Move(entry_address, temp);
+ }
+ }
+
__ LoadConst32(calling_convention.GetRegisterAt(0), string_index.index_);
mips_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+
+ // Store the resolved string to the BSS entry.
+ if (r2_baker_or_no_read_barriers) {
+ // The string entry address was preserved in `entry_address` thanks to kSaveEverything.
+ __ StoreToOffset(kStoreWord, calling_convention.GetRegisterAt(0), entry_address, 0);
+ }
+
Primitive::Type type = instruction_->GetType();
mips_codegen->MoveLocation(locations->Out(),
- calling_convention.GetReturnLocation(type),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
type);
-
RestoreLiveRegisters(codegen, locations);
- // Store the resolved String to the BSS entry.
- // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the
- // .bss entry address in the fast path, so that we can avoid another calculation here.
- bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6();
- Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
- Register out = locations->Out().AsRegister<Register>();
- DCHECK_NE(out, AT);
- CodeGeneratorMIPS::PcRelativePatchInfo* info =
- mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
- bool reordering = __ SetReorder(false);
- mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info, TMP, base);
- __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678);
- __ SetReorder(reordering);
-
+ // Store the resolved string to the BSS entry.
+ if (!r2_baker_or_no_read_barriers) {
+ // For non-Baker read barriers (or on R6), we need to re-calculate the address of
+ // the string entry.
+ Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
+ CodeGeneratorMIPS::PcRelativePatchInfo* info =
+ mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
+ bool reordering = __ SetReorder(false);
+ mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info, TMP, base);
+ __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678);
+ __ SetReorder(reordering);
+ }
__ B(GetExitLabel());
}
@@ -1019,13 +1061,10 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph,
uint32_literals_(std::less<uint32_t>(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- boot_image_string_patches_(StringReferenceValueComparator(),
- graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- boot_image_type_patches_(TypeReferenceValueComparator(),
- graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
clobbered_ra_(false) {
@@ -1564,50 +1603,36 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch
DCHECK(linker_patches->empty());
size_t size =
pc_relative_dex_cache_patches_.size() +
- pc_relative_string_patches_.size() +
+ pc_relative_method_patches_.size() +
pc_relative_type_patches_.size() +
type_bss_entry_patches_.size() +
- boot_image_string_patches_.size() +
- boot_image_type_patches_.size();
+ pc_relative_string_patches_.size();
linker_patches->reserve(size);
EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
linker_patches);
- if (!GetCompilerOptions().IsBootImage()) {
- DCHECK(pc_relative_type_patches_.empty());
- EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+ if (GetCompilerOptions().IsBootImage()) {
+ EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
linker_patches);
- } else {
EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
linker_patches);
EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
linker_patches);
+ } else {
+ DCHECK(pc_relative_method_patches_.empty());
+ DCHECK(pc_relative_type_patches_.empty());
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+ linker_patches);
}
EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
linker_patches);
- for (const auto& entry : boot_image_string_patches_) {
- const StringReference& target_string = entry.first;
- Literal* literal = entry.second;
- DCHECK(literal->GetLabel()->IsBound());
- uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
- linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
- target_string.dex_file,
- target_string.string_index.index_));
- }
- for (const auto& entry : boot_image_type_patches_) {
- const TypeReference& target_type = entry.first;
- Literal* literal = entry.second;
- DCHECK(literal->GetLabel()->IsBound());
- uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
- linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
- target_type.dex_file,
- target_type.type_index.index_));
- }
DCHECK_EQ(size, linker_patches->size());
}
-CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch(
- const DexFile& dex_file, dex::StringIndex string_index) {
- return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeMethodPatch(
+ MethodReference target_method) {
+ return NewPcRelativePatch(*target_method.dex_file,
+ target_method.dex_method_index,
+ &pc_relative_method_patches_);
}
CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeTypePatch(
@@ -1620,6 +1645,11 @@ CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewTypeBssEntryPatch(
return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_);
}
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch(
+ const DexFile& dex_file, dex::StringIndex string_index) {
+ return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
+}
+
CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch(
const DexFile& dex_file, uint32_t element_offset) {
return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
@@ -1637,27 +1667,6 @@ Literal* CodeGeneratorMIPS::DeduplicateUint32Literal(uint32_t value, Uint32ToLit
[this, value]() { return __ NewLiteral<uint32_t>(value); });
}
-Literal* CodeGeneratorMIPS::DeduplicateMethodLiteral(MethodReference target_method,
- MethodToLiteralMap* map) {
- return map->GetOrCreate(
- target_method,
- [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
-}
-
-Literal* CodeGeneratorMIPS::DeduplicateBootImageStringLiteral(const DexFile& dex_file,
- dex::StringIndex string_index) {
- return boot_image_string_patches_.GetOrCreate(
- StringReference(&dex_file, string_index),
- [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
-}
-
-Literal* CodeGeneratorMIPS::DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
- dex::TypeIndex type_index) {
- return boot_image_type_patches_.GetOrCreate(
- TypeReference(&dex_file, type_index),
- [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
-}
-
Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) {
return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
}
@@ -1665,6 +1674,7 @@ Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address)
void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info,
Register out,
Register base) {
+ DCHECK_NE(out, base);
if (GetInstructionSetFeatures().IsR6()) {
DCHECK_EQ(base, ZERO);
__ Bind(&info->high_label);
@@ -1724,31 +1734,32 @@ void CodeGeneratorMIPS::PatchJitRootUse(uint8_t* code,
DCHECK_EQ(code[literal_offset + 1], 0x12);
DCHECK_EQ((code[literal_offset + 2] & 0xE0), 0x00);
DCHECK_EQ(code[literal_offset + 3], 0x3C);
- // lw reg, reg, addr32_low
+ // instr reg, reg, addr32_low
DCHECK_EQ(code[literal_offset + 4], 0x78);
DCHECK_EQ(code[literal_offset + 5], 0x56);
- DCHECK_EQ((code[literal_offset + 7] & 0xFC), 0x8C);
- addr32 += (addr32 & 0x8000) << 1; // Account for sign extension in "lw reg, reg, addr32_low".
+ addr32 += (addr32 & 0x8000) << 1; // Account for sign extension in "instr reg, reg, addr32_low".
// lui reg, addr32_high
code[literal_offset + 0] = static_cast<uint8_t>(addr32 >> 16);
code[literal_offset + 1] = static_cast<uint8_t>(addr32 >> 24);
- // lw reg, reg, addr32_low
+ // instr reg, reg, addr32_low
code[literal_offset + 4] = static_cast<uint8_t>(addr32 >> 0);
code[literal_offset + 5] = static_cast<uint8_t>(addr32 >> 8);
}
void CodeGeneratorMIPS::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
for (const JitPatchInfo& info : jit_string_patches_) {
- const auto& it = jit_string_roots_.find(StringReference(&info.target_dex_file,
- dex::StringIndex(info.index)));
+ const auto it = jit_string_roots_.find(StringReference(&info.target_dex_file,
+ dex::StringIndex(info.index)));
DCHECK(it != jit_string_roots_.end());
- PatchJitRootUse(code, roots_data, info, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, info, index_in_table);
}
for (const JitPatchInfo& info : jit_class_patches_) {
- const auto& it = jit_class_roots_.find(TypeReference(&info.target_dex_file,
- dex::TypeIndex(info.index)));
+ const auto it = jit_class_roots_.find(TypeReference(&info.target_dex_file,
+ dex::TypeIndex(info.index)));
DCHECK(it != jit_class_roots_.end());
- PatchJitRootUse(code, roots_data, info, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, info, index_in_table);
}
}
@@ -2441,6 +2452,9 @@ void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) {
object_array_get_with_read_barrier
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall);
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (Primitive::IsFloatingPointType(type)) {
@@ -3443,8 +3457,6 @@ void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) {
Primitive::Type type = instruction->InputAt(0)->GetType();
LocationSummary* locations = instruction->GetLocations();
- Register dst = locations->Out().AsRegister<Register>();
- MipsLabel true_label;
switch (type) {
default:
@@ -3453,27 +3465,14 @@ void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) {
return;
case Primitive::kPrimLong:
- // TODO: don't use branches.
- GenerateLongCompareAndBranch(instruction->GetCondition(), locations, &true_label);
- break;
+ GenerateLongCompare(instruction->GetCondition(), locations);
+ return;
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
GenerateFpCompare(instruction->GetCondition(), instruction->IsGtBias(), type, locations);
return;
}
-
- // Convert the branches into the result.
- MipsLabel done;
-
- // False case: result = 0.
- __ LoadConst32(dst, 0);
- __ B(&done);
-
- // True case: result = 1.
- __ Bind(&true_label);
- __ LoadConst32(dst, 1);
- __ Bind(&done);
}
void InstructionCodeGeneratorMIPS::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
@@ -4243,6 +4242,221 @@ void InstructionCodeGeneratorMIPS::GenerateIntCompareAndBranch(IfCondition cond,
}
}
+void InstructionCodeGeneratorMIPS::GenerateLongCompare(IfCondition cond,
+ LocationSummary* locations) {
+ Register dst = locations->Out().AsRegister<Register>();
+ Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>();
+ Location rhs_location = locations->InAt(1);
+ Register rhs_high = ZERO;
+ Register rhs_low = ZERO;
+ int64_t imm = 0;
+ uint32_t imm_high = 0;
+ uint32_t imm_low = 0;
+ bool use_imm = rhs_location.IsConstant();
+ if (use_imm) {
+ imm = rhs_location.GetConstant()->AsLongConstant()->GetValue();
+ imm_high = High32Bits(imm);
+ imm_low = Low32Bits(imm);
+ } else {
+ rhs_high = rhs_location.AsRegisterPairHigh<Register>();
+ rhs_low = rhs_location.AsRegisterPairLow<Register>();
+ }
+ if (use_imm && imm == 0) {
+ switch (cond) {
+ case kCondEQ:
+ case kCondBE: // <= 0 if zero
+ __ Or(dst, lhs_high, lhs_low);
+ __ Sltiu(dst, dst, 1);
+ break;
+ case kCondNE:
+ case kCondA: // > 0 if non-zero
+ __ Or(dst, lhs_high, lhs_low);
+ __ Sltu(dst, ZERO, dst);
+ break;
+ case kCondLT:
+ __ Slt(dst, lhs_high, ZERO);
+ break;
+ case kCondGE:
+ __ Slt(dst, lhs_high, ZERO);
+ __ Xori(dst, dst, 1);
+ break;
+ case kCondLE:
+ __ Or(TMP, lhs_high, lhs_low);
+ __ Sra(AT, lhs_high, 31);
+ __ Sltu(dst, AT, TMP);
+ __ Xori(dst, dst, 1);
+ break;
+ case kCondGT:
+ __ Or(TMP, lhs_high, lhs_low);
+ __ Sra(AT, lhs_high, 31);
+ __ Sltu(dst, AT, TMP);
+ break;
+ case kCondB: // always false
+ __ Andi(dst, dst, 0);
+ break;
+ case kCondAE: // always true
+ __ Ori(dst, ZERO, 1);
+ break;
+ }
+ } else if (use_imm) {
+ // TODO: more efficient comparison with constants without loading them into TMP/AT.
+ switch (cond) {
+ case kCondEQ:
+ __ LoadConst32(TMP, imm_high);
+ __ Xor(TMP, TMP, lhs_high);
+ __ LoadConst32(AT, imm_low);
+ __ Xor(AT, AT, lhs_low);
+ __ Or(dst, TMP, AT);
+ __ Sltiu(dst, dst, 1);
+ break;
+ case kCondNE:
+ __ LoadConst32(TMP, imm_high);
+ __ Xor(TMP, TMP, lhs_high);
+ __ LoadConst32(AT, imm_low);
+ __ Xor(AT, AT, lhs_low);
+ __ Or(dst, TMP, AT);
+ __ Sltu(dst, ZERO, dst);
+ break;
+ case kCondLT:
+ case kCondGE:
+ if (dst == lhs_low) {
+ __ LoadConst32(TMP, imm_low);
+ __ Sltu(dst, lhs_low, TMP);
+ }
+ __ LoadConst32(TMP, imm_high);
+ __ Slt(AT, lhs_high, TMP);
+ __ Slt(TMP, TMP, lhs_high);
+ if (dst != lhs_low) {
+ __ LoadConst32(dst, imm_low);
+ __ Sltu(dst, lhs_low, dst);
+ }
+ __ Slt(dst, TMP, dst);
+ __ Or(dst, dst, AT);
+ if (cond == kCondGE) {
+ __ Xori(dst, dst, 1);
+ }
+ break;
+ case kCondGT:
+ case kCondLE:
+ if (dst == lhs_low) {
+ __ LoadConst32(TMP, imm_low);
+ __ Sltu(dst, TMP, lhs_low);
+ }
+ __ LoadConst32(TMP, imm_high);
+ __ Slt(AT, TMP, lhs_high);
+ __ Slt(TMP, lhs_high, TMP);
+ if (dst != lhs_low) {
+ __ LoadConst32(dst, imm_low);
+ __ Sltu(dst, dst, lhs_low);
+ }
+ __ Slt(dst, TMP, dst);
+ __ Or(dst, dst, AT);
+ if (cond == kCondLE) {
+ __ Xori(dst, dst, 1);
+ }
+ break;
+ case kCondB:
+ case kCondAE:
+ if (dst == lhs_low) {
+ __ LoadConst32(TMP, imm_low);
+ __ Sltu(dst, lhs_low, TMP);
+ }
+ __ LoadConst32(TMP, imm_high);
+ __ Sltu(AT, lhs_high, TMP);
+ __ Sltu(TMP, TMP, lhs_high);
+ if (dst != lhs_low) {
+ __ LoadConst32(dst, imm_low);
+ __ Sltu(dst, lhs_low, dst);
+ }
+ __ Slt(dst, TMP, dst);
+ __ Or(dst, dst, AT);
+ if (cond == kCondAE) {
+ __ Xori(dst, dst, 1);
+ }
+ break;
+ case kCondA:
+ case kCondBE:
+ if (dst == lhs_low) {
+ __ LoadConst32(TMP, imm_low);
+ __ Sltu(dst, TMP, lhs_low);
+ }
+ __ LoadConst32(TMP, imm_high);
+ __ Sltu(AT, TMP, lhs_high);
+ __ Sltu(TMP, lhs_high, TMP);
+ if (dst != lhs_low) {
+ __ LoadConst32(dst, imm_low);
+ __ Sltu(dst, dst, lhs_low);
+ }
+ __ Slt(dst, TMP, dst);
+ __ Or(dst, dst, AT);
+ if (cond == kCondBE) {
+ __ Xori(dst, dst, 1);
+ }
+ break;
+ }
+ } else {
+ switch (cond) {
+ case kCondEQ:
+ __ Xor(TMP, lhs_high, rhs_high);
+ __ Xor(AT, lhs_low, rhs_low);
+ __ Or(dst, TMP, AT);
+ __ Sltiu(dst, dst, 1);
+ break;
+ case kCondNE:
+ __ Xor(TMP, lhs_high, rhs_high);
+ __ Xor(AT, lhs_low, rhs_low);
+ __ Or(dst, TMP, AT);
+ __ Sltu(dst, ZERO, dst);
+ break;
+ case kCondLT:
+ case kCondGE:
+ __ Slt(TMP, rhs_high, lhs_high);
+ __ Sltu(AT, lhs_low, rhs_low);
+ __ Slt(TMP, TMP, AT);
+ __ Slt(AT, lhs_high, rhs_high);
+ __ Or(dst, AT, TMP);
+ if (cond == kCondGE) {
+ __ Xori(dst, dst, 1);
+ }
+ break;
+ case kCondGT:
+ case kCondLE:
+ __ Slt(TMP, lhs_high, rhs_high);
+ __ Sltu(AT, rhs_low, lhs_low);
+ __ Slt(TMP, TMP, AT);
+ __ Slt(AT, rhs_high, lhs_high);
+ __ Or(dst, AT, TMP);
+ if (cond == kCondLE) {
+ __ Xori(dst, dst, 1);
+ }
+ break;
+ case kCondB:
+ case kCondAE:
+ __ Sltu(TMP, rhs_high, lhs_high);
+ __ Sltu(AT, lhs_low, rhs_low);
+ __ Slt(TMP, TMP, AT);
+ __ Sltu(AT, lhs_high, rhs_high);
+ __ Or(dst, AT, TMP);
+ if (cond == kCondAE) {
+ __ Xori(dst, dst, 1);
+ }
+ break;
+ case kCondA:
+ case kCondBE:
+ __ Sltu(TMP, lhs_high, rhs_high);
+ __ Sltu(AT, rhs_low, lhs_low);
+ __ Slt(TMP, TMP, AT);
+ __ Sltu(AT, rhs_high, lhs_high);
+ __ Or(dst, AT, TMP);
+ if (cond == kCondBE) {
+ __ Xori(dst, dst, 1);
+ }
+ break;
+ }
+ }
+}
+
void InstructionCodeGeneratorMIPS::GenerateLongCompareAndBranch(IfCondition cond,
LocationSummary* locations,
MipsLabel* label) {
@@ -5775,6 +5989,9 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall));
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
if (generate_volatile) {
InvokeRuntimeCallingConvention calling_convention;
@@ -6453,6 +6670,7 @@ void CodeGeneratorMIPS::GenerateReadBarrierForRootSlow(HInstruction* instruction
void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool baker_read_barrier_slow_path = false;
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
@@ -6460,6 +6678,7 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kArrayObjectCheck:
call_kind =
kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+ baker_read_barrier_slow_path = kUseBakerReadBarrier;
break;
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
@@ -6469,6 +6688,9 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
}
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ if (baker_read_barrier_slow_path) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
// The output does overlap inputs.
@@ -6738,7 +6960,7 @@ void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invo
DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6();
- bool has_extra_input = invoke->HasPcRelativeDexCache() && !is_r6;
+ bool has_extra_input = invoke->HasPcRelativeMethodLoadKind() && !is_r6;
IntrinsicLocationsBuilderMIPS intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
@@ -6784,27 +7006,22 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
bool is_r6 = GetInstructionSetFeatures().IsR6();
bool fallback_load = has_irreducible_loops && !is_r6;
switch (desired_string_load_kind) {
- case HLoadString::LoadKind::kBootImageLinkTimeAddress:
- DCHECK(!GetCompilerOptions().GetCompilePic());
- break;
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- DCHECK(GetCompilerOptions().GetCompilePic());
- break;
- case HLoadString::LoadKind::kBootImageAddress:
- break;
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
+ case HLoadString::LoadKind::kBootImageAddress:
+ break;
case HLoadString::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
fallback_load = false;
break;
- case HLoadString::LoadKind::kDexCacheViaMethod:
+ case HLoadString::LoadKind::kRuntimeCall:
fallback_load = false;
break;
}
if (fallback_load) {
- desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
+ desired_string_load_kind = HLoadString::LoadKind::kRuntimeCall;
}
return desired_string_load_kind;
}
@@ -6823,27 +7040,22 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind(
case HLoadClass::LoadKind::kReferrersClass:
fallback_load = false;
break;
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
- DCHECK(!GetCompilerOptions().GetCompilePic());
- break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- DCHECK(GetCompilerOptions().GetCompilePic());
- break;
- case HLoadClass::LoadKind::kBootImageAddress:
- break;
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
+ case HLoadClass::LoadKind::kBootImageAddress:
+ break;
case HLoadClass::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
fallback_load = false;
break;
- case HLoadClass::LoadKind::kDexCacheViaMethod:
+ case HLoadClass::LoadKind::kRuntimeCall:
fallback_load = false;
break;
}
if (fallback_load) {
- desired_class_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+ desired_class_load_kind = HLoadClass::LoadKind::kRuntimeCall;
}
return desired_class_load_kind;
}
@@ -6885,6 +7097,7 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticO
bool is_r6 = GetInstructionSetFeatures().IsR6();
bool fallback_load = has_irreducible_loops && !is_r6;
switch (dispatch_info.method_load_kind) {
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
break;
default:
@@ -6904,7 +7117,7 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke
HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind();
HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation();
bool is_r6 = GetInstructionSetFeatures().IsR6();
- Register base_reg = (invoke->HasPcRelativeDexCache() && !is_r6)
+ Register base_reg = (invoke->HasPcRelativeMethodLoadKind() && !is_r6)
? GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>())
: ZERO;
@@ -6922,6 +7135,16 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
+ DCHECK(GetCompilerOptions().IsBootImage());
+ PcRelativePatchInfo* info = NewPcRelativeMethodPatch(invoke->GetTargetMethod());
+ bool reordering = __ SetReorder(false);
+ Register temp_reg = temp.AsRegister<Register>();
+ EmitPcRelativeAddressPlaceholderHigh(info, TMP, base_reg);
+ __ Addiu(temp_reg, TMP, /* placeholder */ 0x5678);
+ __ SetReorder(reordering);
+ break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
__ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress());
break;
@@ -7054,28 +7277,28 @@ void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) {
void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
- if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+ if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
InvokeRuntimeCallingConvention calling_convention;
- CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
- cls,
- Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
- calling_convention.GetReturnLocation(Primitive::kPrimNot));
+ Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+ CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc);
return;
}
DCHECK(!cls->NeedsAccessCheck());
-
+ const bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+ if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
switch (load_kind) {
// We need an extra register for PC-relative literals on R2.
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
case HLoadClass::LoadKind::kBootImageAddress:
case HLoadClass::LoadKind::kBssEntry:
- if (codegen_->GetInstructionSetFeatures().IsR6()) {
+ if (isR6) {
break;
}
FALLTHROUGH_INTENDED;
@@ -7086,13 +7309,29 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
break;
}
locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadClass::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the type resolution or initialization and marking to save everything we need.
+ // Request a temp to hold the BSS entry location for the slow path on R2
+ // (no benefit for R6).
+ if (!isR6) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barriers we have a temp-clobbering call.
+ }
+ }
}
// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
// move.
void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
- if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+ if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
codegen_->GenerateLoadClassRuntimeCall(cls);
return;
}
@@ -7105,14 +7344,13 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
switch (load_kind) {
// We need an extra register for PC-relative literals on R2.
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
case HLoadClass::LoadKind::kBootImageAddress:
case HLoadClass::LoadKind::kBssEntry:
base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
break;
case HLoadClass::LoadKind::kReferrersClass:
- case HLoadClass::LoadKind::kDexCacheViaMethod:
+ case HLoadClass::LoadKind::kRuntimeCall:
base_or_current_method_reg = locations->InAt(0).AsRegister<Register>();
break;
default:
@@ -7136,14 +7374,6 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
read_barrier_option);
break;
}
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
- DCHECK(codegen_->GetCompilerOptions().IsBootImage());
- DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
- __ LoadLiteral(out,
- base_or_current_method_reg,
- codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
- cls->GetTypeIndex()));
- break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
@@ -7168,10 +7398,22 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
case HLoadClass::LoadKind::kBssEntry: {
CodeGeneratorMIPS::PcRelativePatchInfo* info =
codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
- bool reordering = __ SetReorder(false);
- codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
- GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option);
- __ SetReorder(reordering);
+ constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier;
+ if (isR6 || non_baker_read_barrier) {
+ bool reordering = __ SetReorder(false);
+ codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
+ GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option);
+ __ SetReorder(reordering);
+ } else {
+ // On R2 save the BSS entry address in a temporary register instead of
+ // recalculating it in the slow path.
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ bool reordering = __ SetReorder(false);
+ codegen_->EmitPcRelativeAddressPlaceholderHigh(info, temp, base_or_current_method_reg);
+ __ Addiu(temp, temp, /* placeholder */ 0x5678);
+ __ SetReorder(reordering);
+ GenerateGcRootFieldLoad(cls, out_loc, temp, /* offset */ 0, read_barrier_option);
+ }
generate_null_check = true;
break;
}
@@ -7186,7 +7428,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
__ SetReorder(reordering);
break;
}
- case HLoadClass::LoadKind::kDexCacheViaMethod:
+ case HLoadClass::LoadKind::kRuntimeCall:
case HLoadClass::LoadKind::kInvalid:
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
@@ -7235,28 +7477,44 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
HLoadString::LoadKind load_kind = load->GetLoadKind();
+ const bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
switch (load_kind) {
// We need an extra register for PC-relative literals on R2.
- case HLoadString::LoadKind::kBootImageLinkTimeAddress:
case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
case HLoadString::LoadKind::kBssEntry:
- if (codegen_->GetInstructionSetFeatures().IsR6()) {
+ if (isR6) {
break;
}
FALLTHROUGH_INTENDED;
// We need an extra register for PC-relative dex cache accesses.
- case HLoadString::LoadKind::kDexCacheViaMethod:
+ case HLoadString::LoadKind::kRuntimeCall:
locations->SetInAt(0, Location::RequiresRegister());
break;
default:
break;
}
- if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+ if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
InvokeRuntimeCallingConvention calling_convention;
- locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
+ locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
} else {
locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadString::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the pResolveString and marking to save everything we need.
+ // Request a temp to hold the BSS entry location for the slow path on R2
+ // (no benefit for R6).
+ if (!isR6) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barriers we have a temp-clobbering call.
+ }
+ }
}
}
@@ -7271,7 +7529,6 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
switch (load_kind) {
// We need an extra register for PC-relative literals on R2.
- case HLoadString::LoadKind::kBootImageLinkTimeAddress:
case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
case HLoadString::LoadKind::kBssEntry:
@@ -7283,13 +7540,6 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
}
switch (load_kind) {
- case HLoadString::LoadKind::kBootImageLinkTimeAddress:
- DCHECK(codegen_->GetCompilerOptions().IsBootImage());
- __ LoadLiteral(out,
- base_or_current_method_reg,
- codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
- load->GetStringIndex()));
- return; // No dex cache slow path.
case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorMIPS::PcRelativePatchInfo* info =
@@ -7313,14 +7563,26 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorMIPS::PcRelativePatchInfo* info =
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
- bool reordering = __ SetReorder(false);
- codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
- GenerateGcRootFieldLoad(load,
- out_loc,
- out,
- /* placeholder */ 0x5678,
- kCompilerReadBarrierOption);
- __ SetReorder(reordering);
+ constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier;
+ if (isR6 || non_baker_read_barrier) {
+ bool reordering = __ SetReorder(false);
+ codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
+ GenerateGcRootFieldLoad(load,
+ out_loc,
+ out,
+ /* placeholder */ 0x5678,
+ kCompilerReadBarrierOption);
+ __ SetReorder(reordering);
+ } else {
+ // On R2 save the BSS entry address in a temporary register instead of
+ // recalculating it in the slow path.
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ bool reordering = __ SetReorder(false);
+ codegen_->EmitPcRelativeAddressPlaceholderHigh(info, temp, base_or_current_method_reg);
+ __ Addiu(temp, temp, /* placeholder */ 0x5678);
+ __ SetReorder(reordering);
+ GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kCompilerReadBarrierOption);
+ }
SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
codegen_->AddSlowPath(slow_path);
__ Beqz(out, slow_path->GetEntryLabel());
@@ -7348,8 +7610,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
}
// TODO: Re-add the compiler code to do string dex cache lookup again.
- DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
+ DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall);
InvokeRuntimeCallingConvention calling_convention;
+ DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
__ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
@@ -7774,6 +8037,15 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) {
}
}
+void LocationsBuilderMIPS::VisitConstructorFence(HConstructorFence* constructor_fence) {
+ constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS::VisitConstructorFence(
+ HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
void LocationsBuilderMIPS::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
memory_barrier->SetLocations(nullptr);
}
@@ -8093,6 +8365,23 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
}
} else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
+
+ // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
+ // value of the output type if the input is outside of the range after the truncation or
+ // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
+ // results. This matches the desired float/double-to-int/long conversion exactly.
+ //
+ // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
+ // value when the input is either a NaN or is outside of the range of the output type
+ // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
+ // the same result.
+ //
+ // The code takes care of the different behaviors by first comparing the input to the
+ // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
+ // If the input is greater than or equal to the minimum, it procedes to the truncate
+ // instruction, which will handle such an input the same way irrespective of NAN2008.
+ // Otherwise the input is compared to itself to determine whether it is a NaN or not
+ // in order to return either zero or the minimum value.
if (result_type == Primitive::kPrimLong) {
if (isR6) {
// trunc.l.s/trunc.l.d requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary
@@ -8100,62 +8389,6 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
Register dst_low = locations->Out().AsRegisterPairLow<Register>();
- MipsLabel truncate;
- MipsLabel done;
-
- // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
- // value when the input is either a NaN or is outside of the range of the output type
- // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
- // the same result.
- //
- // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
- // value of the output type if the input is outside of the range after the truncation or
- // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
- // results. This matches the desired float/double-to-int/long conversion exactly.
- //
- // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction.
- //
- // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
- // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
- // even though it must be NAN2008=1 on R6.
- //
- // The code takes care of the different behaviors by first comparing the input to the
- // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
- // If the input is greater than or equal to the minimum, it procedes to the truncate
- // instruction, which will handle such an input the same way irrespective of NAN2008.
- // Otherwise the input is compared to itself to determine whether it is a NaN or not
- // in order to return either zero or the minimum value.
- //
- // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
- // truncate instruction for MIPS64R6.
- if (input_type == Primitive::kPrimFloat) {
- uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min());
- __ LoadConst32(TMP, min_val);
- __ Mtc1(TMP, FTMP);
- __ CmpLeS(FTMP, FTMP, src);
- } else {
- uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min());
- __ LoadConst32(TMP, High32Bits(min_val));
- __ Mtc1(ZERO, FTMP);
- __ Mthc1(TMP, FTMP);
- __ CmpLeD(FTMP, FTMP, src);
- }
-
- __ Bc1nez(FTMP, &truncate);
-
- if (input_type == Primitive::kPrimFloat) {
- __ CmpEqS(FTMP, src, src);
- } else {
- __ CmpEqD(FTMP, src, src);
- }
- __ Move(dst_low, ZERO);
- __ LoadConst32(dst_high, std::numeric_limits<int32_t>::min());
- __ Mfc1(TMP, FTMP);
- __ And(dst_high, dst_high, TMP);
-
- __ B(&done);
-
- __ Bind(&truncate);
if (input_type == Primitive::kPrimFloat) {
__ TruncLS(FTMP, src);
@@ -8164,8 +8397,6 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
}
__ Mfc1(dst_low, FTMP);
__ Mfhc1(dst_high, FTMP);
-
- __ Bind(&done);
} else {
QuickEntrypointEnum entrypoint = (input_type == Primitive::kPrimFloat) ? kQuickF2l
: kQuickD2l;
@@ -8182,43 +8413,19 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
MipsLabel truncate;
MipsLabel done;
- // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
- // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
- // even though it must be NAN2008=1 on R6.
- //
- // For details see the large comment above for the truncation of float/double to long on R6.
- //
- // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
- // truncate instruction for MIPS64R6.
- if (input_type == Primitive::kPrimFloat) {
- uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
- __ LoadConst32(TMP, min_val);
- __ Mtc1(TMP, FTMP);
- } else {
- uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
- __ LoadConst32(TMP, High32Bits(min_val));
- __ Mtc1(ZERO, FTMP);
- __ MoveToFpuHigh(TMP, FTMP);
- }
-
- if (isR6) {
+ if (!isR6) {
if (input_type == Primitive::kPrimFloat) {
- __ CmpLeS(FTMP, FTMP, src);
+ uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
+ __ LoadConst32(TMP, min_val);
+ __ Mtc1(TMP, FTMP);
} else {
- __ CmpLeD(FTMP, FTMP, src);
+ uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
+ __ LoadConst32(TMP, High32Bits(min_val));
+ __ Mtc1(ZERO, FTMP);
+ __ MoveToFpuHigh(TMP, FTMP);
}
- __ Bc1nez(FTMP, &truncate);
if (input_type == Primitive::kPrimFloat) {
- __ CmpEqS(FTMP, src, src);
- } else {
- __ CmpEqD(FTMP, src, src);
- }
- __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
- __ Mfc1(TMP, FTMP);
- __ And(dst, dst, TMP);
- } else {
- if (input_type == Primitive::kPrimFloat) {
__ ColeS(0, FTMP, src);
} else {
__ ColeD(0, FTMP, src);
@@ -8232,11 +8439,11 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
}
__ LoadConst32(dst, std::numeric_limits<int32_t>::min());
__ Movf(dst, ZERO, 0);
- }
- __ B(&done);
+ __ B(&done);
- __ Bind(&truncate);
+ __ Bind(&truncate);
+ }
if (input_type == Primitive::kPrimFloat) {
__ TruncWS(FTMP, src);
@@ -8245,7 +8452,9 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
}
__ Mfc1(dst, FTMP);
- __ Bind(&done);
+ if (!isR6) {
+ __ Bind(&done);
+ }
}
} else if (Primitive::IsFloatingPointType(result_type) &&
Primitive::IsFloatingPointType(input_type)) {
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 3875c4bdba..736b5070d9 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -23,8 +23,8 @@
#include "nodes.h"
#include "parallel_move_resolver.h"
#include "string_reference.h"
+#include "type_reference.h"
#include "utils/mips/assembler_mips.h"
-#include "utils/type_reference.h"
namespace art {
namespace mips {
@@ -229,9 +229,10 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
// We switch to the table-based method starting with 7 cases.
static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
+ void GenerateMemoryBarrier(MemBarrierKind kind);
+
private:
void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg);
- void GenerateMemoryBarrier(MemBarrierKind kind);
void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
void HandleBinaryOp(HBinaryOperation* operation);
void HandleCondition(HCondition* instruction);
@@ -294,6 +295,7 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
void GenerateIntCompareAndBranch(IfCondition cond,
LocationSummary* locations,
MipsLabel* label);
+ void GenerateLongCompare(IfCondition cond, LocationSummary* locations);
void GenerateLongCompareAndBranch(IfCondition cond,
LocationSummary* locations,
MipsLabel* label);
@@ -580,15 +582,13 @@ class CodeGeneratorMIPS : public CodeGenerator {
MipsLabel pc_rel_label;
};
- PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
- dex::StringIndex string_index);
+ PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method);
PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
+ PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
+ dex::StringIndex string_index);
PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
uint32_t element_offset);
- Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
- dex::StringIndex string_index);
- Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index);
Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info, Register out, Register base);
@@ -622,16 +622,8 @@ class CodeGeneratorMIPS : public CodeGenerator {
Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>;
- using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
- using BootStringToLiteralMap = ArenaSafeMap<StringReference,
- Literal*,
- StringReferenceValueComparator>;
- using BootTypeToLiteralMap = ArenaSafeMap<TypeReference,
- Literal*,
- TypeReferenceValueComparator>;
Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
- Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file,
uint32_t offset_or_index,
ArenaDeque<PcRelativePatchInfo>* patches);
@@ -653,16 +645,15 @@ class CodeGeneratorMIPS : public CodeGenerator {
Uint32ToLiteralMap uint32_literals_;
// PC-relative patch info for each HMipsDexCacheArraysBase.
ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
- // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
- BootStringToLiteralMap boot_image_string_patches_;
- // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
- ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
- // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
- BootTypeToLiteralMap boot_image_type_patches_;
+ // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_;
// PC-relative type patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+ // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
+ ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+
// Patches for string root accesses in JIT compiled code.
ArenaDeque<JitPatchInfo> jit_string_patches_;
// Patches for class root accesses in JIT compiled code.
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 0459a033f8..6026814f04 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -141,7 +141,8 @@ class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
public:
- explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction) : SlowPathCodeMIPS64(instruction) {}
+ explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction)
+ : SlowPathCodeMIPS64(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
@@ -192,7 +193,9 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 {
if (out.IsValid()) {
DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
Primitive::Type type = instruction_->GetType();
- mips64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
+ mips64_codegen->MoveLocation(out,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ type);
}
RestoreLiveRegisters(codegen, locations);
@@ -200,10 +203,6 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 {
DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
DCHECK(out.IsValid());
- // TODO: Change art_quick_initialize_type/art_quick_initialize_static_storage to
- // kSaveEverything and use a temporary for the .bss entry address in the fast path,
- // so that we can avoid another calculation here.
- DCHECK_NE(out.AsRegister<GpuRegister>(), AT);
CodeGeneratorMIPS64::PcRelativePatchInfo* info =
mips64_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index);
mips64_codegen->EmitPcRelativeAddressPlaceholderHigh(info, AT);
@@ -250,16 +249,13 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 {
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
Primitive::Type type = instruction_->GetType();
mips64_codegen->MoveLocation(locations->Out(),
- calling_convention.GetReturnLocation(type),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
type);
RestoreLiveRegisters(codegen, locations);
// Store the resolved String to the BSS entry.
- // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the
- // .bss entry address in the fast path, so that we can avoid another calculation here.
GpuRegister out = locations->Out().AsRegister<GpuRegister>();
- DCHECK_NE(out, AT);
CodeGeneratorMIPS64::PcRelativePatchInfo* info =
mips64_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
mips64_codegen->EmitPcRelativeAddressPlaceholderHigh(info, AT);
@@ -306,10 +302,13 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
: SlowPathCodeMIPS64(instruction), successor_(successor) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
__ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations); // Only saves live vector registers for SIMD.
mips64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+ RestoreLiveRegisters(codegen, locations); // Only restores live vector registers for SIMD.
if (successor_ == nullptr) {
__ Bc(GetReturnLabel());
} else {
@@ -952,20 +951,17 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph,
location_builder_(graph, this),
instruction_visitor_(graph, this),
move_resolver_(graph->GetArena(), this),
- assembler_(graph->GetArena()),
+ assembler_(graph->GetArena(), &isa_features),
isa_features_(isa_features),
uint32_literals_(std::less<uint32_t>(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
uint64_literals_(std::less<uint64_t>(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- boot_image_string_patches_(StringReferenceValueComparator(),
- graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- boot_image_type_patches_(TypeReferenceValueComparator(),
- graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(StringReferenceValueComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(TypeReferenceValueComparator(),
@@ -1445,50 +1441,36 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat
DCHECK(linker_patches->empty());
size_t size =
pc_relative_dex_cache_patches_.size() +
- pc_relative_string_patches_.size() +
+ pc_relative_method_patches_.size() +
pc_relative_type_patches_.size() +
type_bss_entry_patches_.size() +
- boot_image_string_patches_.size() +
- boot_image_type_patches_.size();
+ pc_relative_string_patches_.size();
linker_patches->reserve(size);
EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
linker_patches);
- if (!GetCompilerOptions().IsBootImage()) {
- DCHECK(pc_relative_type_patches_.empty());
- EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+ if (GetCompilerOptions().IsBootImage()) {
+ EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
linker_patches);
- } else {
EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
linker_patches);
EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
linker_patches);
+ } else {
+ DCHECK(pc_relative_method_patches_.empty());
+ DCHECK(pc_relative_type_patches_.empty());
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+ linker_patches);
}
EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
linker_patches);
- for (const auto& entry : boot_image_string_patches_) {
- const StringReference& target_string = entry.first;
- Literal* literal = entry.second;
- DCHECK(literal->GetLabel()->IsBound());
- uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
- linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
- target_string.dex_file,
- target_string.string_index.index_));
- }
- for (const auto& entry : boot_image_type_patches_) {
- const TypeReference& target_type = entry.first;
- Literal* literal = entry.second;
- DCHECK(literal->GetLabel()->IsBound());
- uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
- linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
- target_type.dex_file,
- target_type.type_index.index_));
- }
DCHECK_EQ(size, linker_patches->size());
}
-CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStringPatch(
- const DexFile& dex_file, dex::StringIndex string_index) {
- return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
+CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeMethodPatch(
+ MethodReference target_method) {
+ return NewPcRelativePatch(*target_method.dex_file,
+ target_method.dex_method_index,
+ &pc_relative_method_patches_);
}
CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeTypePatch(
@@ -1501,6 +1483,11 @@ CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewTypeBssEntryPa
return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_);
}
+CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStringPatch(
+ const DexFile& dex_file, dex::StringIndex string_index) {
+ return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
+}
+
CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeDexCacheArrayPatch(
const DexFile& dex_file, uint32_t element_offset) {
return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
@@ -1524,27 +1511,6 @@ Literal* CodeGeneratorMIPS64::DeduplicateUint64Literal(uint64_t value) {
[this, value]() { return __ NewLiteral<uint64_t>(value); });
}
-Literal* CodeGeneratorMIPS64::DeduplicateMethodLiteral(MethodReference target_method,
- MethodToLiteralMap* map) {
- return map->GetOrCreate(
- target_method,
- [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
-}
-
-Literal* CodeGeneratorMIPS64::DeduplicateBootImageStringLiteral(const DexFile& dex_file,
- dex::StringIndex string_index) {
- return boot_image_string_patches_.GetOrCreate(
- StringReference(&dex_file, string_index),
- [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
-}
-
-Literal* CodeGeneratorMIPS64::DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
- dex::TypeIndex type_index) {
- return boot_image_type_patches_.GetOrCreate(
- TypeReference(&dex_file, type_index),
- [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
-}
-
Literal* CodeGeneratorMIPS64::DeduplicateBootImageAddressLiteral(uint64_t address) {
return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
}
@@ -1590,14 +1556,20 @@ void CodeGeneratorMIPS64::PatchJitRootUse(uint8_t* code,
void CodeGeneratorMIPS64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
for (const auto& entry : jit_string_patches_) {
- const auto& it = jit_string_roots_.find(entry.first);
+ const StringReference& string_reference = entry.first;
+ Literal* table_entry_literal = entry.second;
+ const auto it = jit_string_roots_.find(string_reference);
DCHECK(it != jit_string_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
for (const auto& entry : jit_class_patches_) {
- const auto& it = jit_class_roots_.find(entry.first);
+ const TypeReference& type_reference = entry.first;
+ Literal* table_entry_literal = entry.second;
+ const auto it = jit_class_roots_.find(type_reference);
DCHECK(it != jit_class_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
}
@@ -1645,13 +1617,19 @@ size_t CodeGeneratorMIPS64::RestoreCoreRegister(size_t stack_index, uint32_t reg
}
size_t CodeGeneratorMIPS64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ StoreFpuToOffset(kStoreDoubleword, FpuRegister(reg_id), SP, stack_index);
- return kMips64DoublewordSize;
+ __ StoreFpuToOffset(GetGraph()->HasSIMD() ? kStoreQuadword : kStoreDoubleword,
+ FpuRegister(reg_id),
+ SP,
+ stack_index);
+ return GetFloatingPointSpillSlotSize();
}
size_t CodeGeneratorMIPS64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ LoadFpuFromOffset(kLoadDoubleword, FpuRegister(reg_id), SP, stack_index);
- return kMips64DoublewordSize;
+ __ LoadFpuFromOffset(GetGraph()->HasSIMD() ? kLoadQuadword : kLoadDoubleword,
+ FpuRegister(reg_id),
+ SP,
+ stack_index);
+ return GetFloatingPointSpillSlotSize();
}
void CodeGeneratorMIPS64::DumpCoreRegister(std::ostream& stream, int reg) const {
@@ -1991,6 +1969,9 @@ void LocationsBuilderMIPS64::VisitArrayGet(HArrayGet* instruction) {
object_array_get_with_read_barrier
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall);
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (Primitive::IsFloatingPointType(type)) {
@@ -3990,6 +3971,9 @@ void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction,
object_field_get_with_read_barrier
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall);
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
if (Primitive::IsFloatingPointType(instruction->GetType())) {
locations->SetOut(Location::RequiresFpuRegister());
@@ -4552,6 +4536,7 @@ void CodeGeneratorMIPS64::GenerateReadBarrierForRootSlow(HInstruction* instructi
void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool baker_read_barrier_slow_path = false;
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
@@ -4559,6 +4544,7 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kArrayObjectCheck:
call_kind =
kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+ baker_read_barrier_slow_path = kUseBakerReadBarrier;
break;
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
@@ -4568,6 +4554,9 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
}
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ if (baker_read_barrier_slow_path) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
// The output does overlap inputs.
@@ -4876,25 +4865,19 @@ HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
bool fallback_load = false;
switch (desired_string_load_kind) {
- case HLoadString::LoadKind::kBootImageLinkTimeAddress:
- DCHECK(!GetCompilerOptions().GetCompilePic());
- break;
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- DCHECK(GetCompilerOptions().GetCompilePic());
- break;
- case HLoadString::LoadKind::kBootImageAddress:
- break;
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
- case HLoadString::LoadKind::kDexCacheViaMethod:
- break;
case HLoadString::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
+ case HLoadString::LoadKind::kBootImageAddress:
+ case HLoadString::LoadKind::kRuntimeCall:
+ break;
}
if (fallback_load) {
- desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
+ desired_string_load_kind = HLoadString::LoadKind::kRuntimeCall;
}
return desired_string_load_kind;
}
@@ -4908,25 +4891,19 @@ HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind(
UNREACHABLE();
case HLoadClass::LoadKind::kReferrersClass:
break;
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
- DCHECK(!GetCompilerOptions().GetCompilePic());
- break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- DCHECK(GetCompilerOptions().GetCompilePic());
- break;
- case HLoadClass::LoadKind::kBootImageAddress:
- break;
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
case HLoadClass::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadClass::LoadKind::kDexCacheViaMethod:
+ case HLoadClass::LoadKind::kBootImageAddress:
+ case HLoadClass::LoadKind::kRuntimeCall:
break;
}
if (fallback_load) {
- desired_class_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+ desired_class_load_kind = HLoadClass::LoadKind::kRuntimeCall;
}
return desired_class_load_kind;
}
@@ -4958,6 +4935,14 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
+ DCHECK(GetCompilerOptions().IsBootImage());
+ CodeGeneratorMIPS64::PcRelativePatchInfo* info =
+ NewPcRelativeMethodPatch(invoke->GetTargetMethod());
+ EmitPcRelativeAddressPlaceholderHigh(info, AT);
+ __ Daddiu(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678);
+ break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
__ LoadLiteral(temp.AsRegister<GpuRegister>(),
kLoadDoubleword,
@@ -5083,12 +5068,10 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke)
void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) {
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
- if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+ if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
InvokeRuntimeCallingConvention calling_convention;
- CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
- cls,
- Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
- calling_convention.GetReturnLocation(Primitive::kPrimNot));
+ Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+ CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc);
return;
}
DCHECK(!cls->NeedsAccessCheck());
@@ -5098,17 +5081,31 @@ void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) {
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+ if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
locations->SetInAt(0, Location::RequiresRegister());
}
locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadClass::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the type resolution or initialization and marking to save everything we need.
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ }
+ }
}
// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
// move.
void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
- if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+ if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
codegen_->GenerateLoadClassRuntimeCall(cls);
return;
}
@@ -5119,7 +5116,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
GpuRegister out = out_loc.AsRegister<GpuRegister>();
GpuRegister current_method_reg = ZERO;
if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
- load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+ load_kind == HLoadClass::LoadKind::kRuntimeCall) {
current_method_reg = locations->InAt(0).AsRegister<GpuRegister>();
}
@@ -5138,14 +5135,6 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
ArtMethod::DeclaringClassOffset().Int32Value(),
read_barrier_option);
break;
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
- DCHECK(codegen_->GetCompilerOptions().IsBootImage());
- DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
- __ LoadLiteral(out,
- kLoadUnsignedWord,
- codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
- cls->GetTypeIndex()));
- break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
@@ -5181,7 +5170,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
cls->GetClass()));
GenerateGcRootFieldLoad(cls, out_loc, out, 0, read_barrier_option);
break;
- case HLoadClass::LoadKind::kDexCacheViaMethod:
+ case HLoadClass::LoadKind::kRuntimeCall:
case HLoadClass::LoadKind::kInvalid:
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
@@ -5230,11 +5219,22 @@ void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) {
HLoadString::LoadKind load_kind = load->GetLoadKind();
LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
- if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+ if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
InvokeRuntimeCallingConvention calling_convention;
- locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
+ locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
} else {
locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadString::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the pResolveString and marking to save everything we need.
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ }
+ }
}
}
@@ -5247,13 +5247,6 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA
GpuRegister out = out_loc.AsRegister<GpuRegister>();
switch (load_kind) {
- case HLoadString::LoadKind::kBootImageLinkTimeAddress:
- DCHECK(codegen_->GetCompilerOptions().IsBootImage());
- __ LoadLiteral(out,
- kLoadUnsignedWord,
- codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
- load->GetStringIndex()));
- return; // No dex cache slow path.
case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorMIPS64::PcRelativePatchInfo* info =
@@ -5300,8 +5293,9 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA
}
// TODO: Re-add the compiler code to do string dex cache lookup again.
- DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
+ DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall);
InvokeRuntimeCallingConvention calling_convention;
+ DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
__ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
@@ -5661,6 +5655,15 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) {
}
}
+void LocationsBuilderMIPS64::VisitConstructorFence(HConstructorFence* constructor_fence) {
+ constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitConstructorFence(
+ HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
void LocationsBuilderMIPS64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
memory_barrier->SetLocations(nullptr);
}
@@ -5806,7 +5809,11 @@ void InstructionCodeGeneratorMIPS64::VisitUnresolvedStaticFieldSet(
void LocationsBuilderMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // In suspend check slow path, usually there are no caller-save registers at all.
+ // If SIMD instructions are present, however, we force spilling all live SIMD
+ // registers in full width (since the runtime only saves/restores lower part).
+ locations->SetCustomSlowPathCallerSaves(
+ GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
}
void InstructionCodeGeneratorMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -5933,68 +5940,6 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver
CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>();
- Mips64Label truncate;
- Mips64Label done;
-
- // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
- // value when the input is either a NaN or is outside of the range of the output type
- // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
- // the same result.
- //
- // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
- // value of the output type if the input is outside of the range after the truncation or
- // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
- // results. This matches the desired float/double-to-int/long conversion exactly.
- //
- // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction.
- //
- // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
- // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
- // even though it must be NAN2008=1 on R6.
- //
- // The code takes care of the different behaviors by first comparing the input to the
- // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
- // If the input is greater than or equal to the minimum, it procedes to the truncate
- // instruction, which will handle such an input the same way irrespective of NAN2008.
- // Otherwise the input is compared to itself to determine whether it is a NaN or not
- // in order to return either zero or the minimum value.
- //
- // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
- // truncate instruction for MIPS64R6.
- if (input_type == Primitive::kPrimFloat) {
- uint32_t min_val = (result_type == Primitive::kPrimLong)
- ? bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min())
- : bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
- __ LoadConst32(TMP, min_val);
- __ Mtc1(TMP, FTMP);
- __ CmpLeS(FTMP, FTMP, src);
- } else {
- uint64_t min_val = (result_type == Primitive::kPrimLong)
- ? bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min())
- : bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
- __ LoadConst64(TMP, min_val);
- __ Dmtc1(TMP, FTMP);
- __ CmpLeD(FTMP, FTMP, src);
- }
-
- __ Bc1nez(FTMP, &truncate);
-
- if (input_type == Primitive::kPrimFloat) {
- __ CmpEqS(FTMP, src, src);
- } else {
- __ CmpEqD(FTMP, src, src);
- }
- if (result_type == Primitive::kPrimLong) {
- __ LoadConst64(dst, std::numeric_limits<int64_t>::min());
- } else {
- __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
- }
- __ Mfc1(TMP, FTMP);
- __ And(dst, dst, TMP);
-
- __ Bc(&done);
-
- __ Bind(&truncate);
if (result_type == Primitive::kPrimLong) {
if (input_type == Primitive::kPrimFloat) {
@@ -6011,8 +5956,6 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver
}
__ Mfc1(dst, FTMP);
}
-
- __ Bind(&done);
} else if (Primitive::IsFloatingPointType(result_type) &&
Primitive::IsFloatingPointType(input_type)) {
FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index fd1a174608..9c6b6f62cb 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -21,8 +21,8 @@
#include "driver/compiler_options.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
+#include "type_reference.h"
#include "utils/mips64/assembler_mips64.h"
-#include "utils/type_reference.h"
namespace art {
namespace mips64 {
@@ -226,9 +226,10 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
// We switch to the table-based method starting with 7 cases.
static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
+ void GenerateMemoryBarrier(MemBarrierKind kind);
+
private:
void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg);
- void GenerateMemoryBarrier(MemBarrierKind kind);
void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
void HandleBinaryOp(HBinaryOperation* operation);
void HandleCondition(HCondition* instruction);
@@ -313,6 +314,9 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
uint32_t num_entries,
HBasicBlock* switch_block,
HBasicBlock* default_block);
+ int32_t VecAddress(LocationSummary* locations,
+ size_t size,
+ /* out */ GpuRegister* adjusted_base);
Mips64Assembler* const assembler_;
CodeGeneratorMIPS64* const codegen_;
@@ -335,7 +339,11 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
size_t GetWordSize() const OVERRIDE { return kMips64DoublewordSize; }
- size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64DoublewordSize; }
+ size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+ return GetGraph()->HasSIMD()
+ ? 2 * kMips64DoublewordSize // 16 bytes for each spill.
+ : 1 * kMips64DoublewordSize; // 8 bytes for each spill.
+ }
uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
return assembler_.GetLabelLocation(GetLabelOf(block));
@@ -540,17 +548,15 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
Mips64Label pc_rel_label;
};
- PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
- dex::StringIndex string_index);
+ PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method);
PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
+ PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
+ dex::StringIndex string_index);
PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
uint32_t element_offset);
PcRelativePatchInfo* NewPcRelativeCallPatch(const DexFile& dex_file,
uint32_t method_index);
- Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
- dex::StringIndex string_index);
- Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index);
Literal* DeduplicateBootImageAddressLiteral(uint64_t address);
void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info, GpuRegister out);
@@ -569,23 +575,15 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
private:
using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>;
using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, Literal*>;
- using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
using StringToLiteralMap = ArenaSafeMap<StringReference,
Literal*,
StringReferenceValueComparator>;
using TypeToLiteralMap = ArenaSafeMap<TypeReference,
Literal*,
TypeReferenceValueComparator>;
- using BootStringToLiteralMap = ArenaSafeMap<StringReference,
- Literal*,
- StringReferenceValueComparator>;
- using BootTypeToLiteralMap = ArenaSafeMap<TypeReference,
- Literal*,
- TypeReferenceValueComparator>;
Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
Literal* DeduplicateUint64Literal(uint64_t value);
- Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file,
uint32_t offset_or_index,
@@ -611,16 +609,15 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
Uint64ToLiteralMap uint64_literals_;
// PC-relative patch info.
ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
- // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
- BootStringToLiteralMap boot_image_string_patches_;
- // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
- ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
- // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
- BootTypeToLiteralMap boot_image_type_patches_;
+ // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_;
// PC-relative type patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+ // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
+ ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+
// Patches for string root accesses in JIT compiled code.
StringToLiteralMap jit_string_patches_;
// Patches for class root accesses in JIT compiled code.
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 93befa439c..a41adca02c 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -22,6 +22,7 @@ using namespace vixl::aarch64; // NOLINT(build/namespaces)
namespace art {
namespace arm64 {
+using helpers::DRegisterFrom;
using helpers::VRegisterFrom;
using helpers::HeapOperand;
using helpers::InputRegisterAt;
@@ -467,7 +468,50 @@ void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) {
}
void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
- LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B());
+ } else {
+ __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B());
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H());
+ } else {
+ __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H());
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S());
+ } else {
+ __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S());
+ }
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) {
@@ -475,7 +519,50 @@ void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) {
}
void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
- LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B());
+ } else {
+ __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B());
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H());
+ } else {
+ __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H());
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S());
+ } else {
+ __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S());
+ }
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
@@ -771,20 +858,28 @@ static void CreateVecMemLocations(ArenaAllocator* arena,
}
}
-// Helper to set up registers and address for vector memory operations.
-MemOperand InstructionCodeGeneratorARM64::CreateVecMemRegisters(
+// Helper to set up locations for vector memory operations. Returns the memory operand and,
+// if used, sets the output parameter scratch to a temporary register used in this operand,
+// so that the client can release it right after the memory operand use.
+MemOperand InstructionCodeGeneratorARM64::VecAddress(
HVecMemoryOperation* instruction,
- Location* reg_loc,
- bool is_load,
- UseScratchRegisterScope* temps_scope) {
+ UseScratchRegisterScope* temps_scope,
+ size_t size,
+ bool is_string_char_at,
+ /*out*/ Register* scratch) {
LocationSummary* locations = instruction->GetLocations();
Register base = InputRegisterAt(instruction, 0);
- Location index = locations->InAt(1);
- *reg_loc = is_load ? locations->Out() : locations->InAt(2);
- Primitive::Type packed_type = instruction->GetPackedType();
- uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(packed_type)).Uint32Value();
- size_t shift = Primitive::ComponentSizeShift(packed_type);
+ if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
+ DCHECK(!is_string_char_at);
+ return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
+ }
+
+ Location index = locations->InAt(1);
+ uint32_t offset = is_string_char_at
+ ? mirror::String::ValueOffset().Uint32Value()
+ : mirror::Array::DataOffset(size).Uint32Value();
+ size_t shift = ComponentSizeShiftWidth(size);
// HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
@@ -793,10 +888,9 @@ MemOperand InstructionCodeGeneratorARM64::CreateVecMemRegisters(
offset += Int64ConstantFrom(index) << shift;
return HeapOperand(base, offset);
} else {
- Register temp = temps_scope->AcquireSameSizeAs(base);
- __ Add(temp, base, Operand(WRegisterFrom(index), LSL, shift));
-
- return HeapOperand(temp, offset);
+ *scratch = temps_scope->AcquireSameSizeAs(base);
+ __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift));
+ return HeapOperand(*scratch, offset);
}
}
@@ -805,15 +899,43 @@ void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) {
}
void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
- Location reg_loc = Location::NoLocation();
+ LocationSummary* locations = instruction->GetLocations();
+ size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+ VRegister reg = VRegisterFrom(locations->Out());
UseScratchRegisterScope temps(GetVIXLAssembler());
- MemOperand mem = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true, &temps);
- VRegister reg = VRegisterFrom(reg_loc);
+ Register scratch;
switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ // Special handling of compressed/uncompressed string load.
+ if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ vixl::aarch64::Label uncompressed_load, done;
+ // Test compression bit.
+ static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+ "Expecting 0=compressed, 1=uncompressed");
+ uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ Register length = temps.AcquireW();
+ __ Ldr(length, HeapOperand(InputRegisterAt(instruction, 0), count_offset));
+ __ Tbnz(length.W(), 0, &uncompressed_load);
+ temps.Release(length); // no longer needed
+ // Zero extend 8 compressed bytes into 8 chars.
+ __ Ldr(DRegisterFrom(locations->Out()).V8B(),
+ VecAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
+ __ Uxtl(reg.V8H(), reg.V8B());
+ __ B(&done);
+ if (scratch.IsValid()) {
+ temps.Release(scratch); // if used, no longer needed
+ }
+ // Load 8 direct uncompressed chars.
+ __ Bind(&uncompressed_load);
+ __ Ldr(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
+ __ Bind(&done);
+ return;
+ }
+ FALLTHROUGH_INTENDED;
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
case Primitive::kPrimFloat:
@@ -821,7 +943,7 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
case Primitive::kPrimDouble:
DCHECK_LE(2u, instruction->GetVectorLength());
DCHECK_LE(instruction->GetVectorLength(), 16u);
- __ Ldr(reg, mem);
+ __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -834,10 +956,11 @@ void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) {
}
void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
- Location reg_loc = Location::NoLocation();
+ LocationSummary* locations = instruction->GetLocations();
+ size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+ VRegister reg = VRegisterFrom(locations->InAt(2));
UseScratchRegisterScope temps(GetVIXLAssembler());
- MemOperand mem = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false, &temps);
- VRegister reg = VRegisterFrom(reg_loc);
+ Register scratch;
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
@@ -850,7 +973,7 @@ void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
case Primitive::kPrimDouble:
DCHECK_LE(2u, instruction->GetVectorLength());
DCHECK_LE(instruction->GetVectorLength(), 16u);
- __ Str(reg, mem);
+ __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 50b95c17cb..af9e89e791 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -15,6 +15,7 @@
*/
#include "code_generator_mips64.h"
+#include "mirror/array-inl.h"
namespace art {
namespace mips64 {
@@ -22,12 +23,72 @@ namespace mips64 {
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
#define __ down_cast<Mips64Assembler*>(GetAssembler())-> // NOLINT
+VectorRegister VectorRegisterFrom(Location location) {
+ DCHECK(location.IsFpuRegister());
+ return static_cast<VectorRegister>(location.AsFpuRegister<FpuRegister>());
+}
+
void LocationsBuilderMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ FillB(dst, locations->InAt(0).AsRegister<GpuRegister>());
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ FillH(dst, locations->InAt(0).AsRegister<GpuRegister>());
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ FillW(dst, locations->InAt(0).AsRegister<GpuRegister>());
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ FillD(dst, locations->InAt(0).AsRegister<GpuRegister>());
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ ReplicateFPToVectorRegister(dst,
+ locations->InAt(0).AsFpuRegister<FpuRegister>(),
+ /* is_double */ false);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ ReplicateFPToVectorRegister(dst,
+ locations->InAt(0).AsFpuRegister<FpuRegister>(),
+ /* is_double */ true);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
@@ -51,13 +112,23 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in
LocationSummary* locations = new (arena) LocationSummary(instruction);
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(),
+ instruction->IsVecNot() ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap);
+ break;
case Primitive::kPrimByte:
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
- DCHECK(locations);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(),
+ (instruction->IsVecNeg() || instruction->IsVecAbs())
+ ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap);
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -70,7 +141,18 @@ void LocationsBuilderMIPS64::VisitVecCnv(HVecCnv* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitVecCnv(HVecCnv* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+ Primitive::Type from = instruction->GetInputType();
+ Primitive::Type to = instruction->GetResultType();
+ if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Ffint_sW(dst, src);
+ } else {
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderMIPS64::VisitVecNeg(HVecNeg* instruction) {
@@ -78,7 +160,45 @@ void LocationsBuilderMIPS64::VisitVecNeg(HVecNeg* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ FillB(dst, ZERO);
+ __ SubvB(dst, dst, src);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ FillH(dst, ZERO);
+ __ SubvH(dst, dst, src);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ FillW(dst, ZERO);
+ __ SubvW(dst, dst, src);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ FillD(dst, ZERO);
+ __ SubvD(dst, dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ FillW(dst, ZERO);
+ __ FsubW(dst, dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ FillD(dst, ZERO);
+ __ FsubD(dst, dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderMIPS64::VisitVecAbs(HVecAbs* instruction) {
@@ -86,7 +206,47 @@ void LocationsBuilderMIPS64::VisitVecAbs(HVecAbs* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitVecAbs(HVecAbs* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ FillB(dst, ZERO); // all zeroes
+ __ Add_aB(dst, dst, src); // dst = abs(0) + abs(src)
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ FillH(dst, ZERO); // all zeroes
+ __ Add_aH(dst, dst, src); // dst = abs(0) + abs(src)
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ FillW(dst, ZERO); // all zeroes
+ __ Add_aW(dst, dst, src); // dst = abs(0) + abs(src)
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ FillD(dst, ZERO); // all zeroes
+ __ Add_aD(dst, dst, src); // dst = abs(0) + abs(src)
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ LdiW(dst, -1); // all ones
+ __ SrliW(dst, dst, 1);
+ __ AndV(dst, dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ LdiD(dst, -1); // all ones
+ __ SrliD(dst, dst, 1);
+ __ AndV(dst, dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderMIPS64::VisitVecNot(HVecNot* instruction) {
@@ -94,7 +254,30 @@ void LocationsBuilderMIPS64::VisitVecNot(HVecNot* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitVecNot(HVecNot* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean: // special case boolean-not
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ LdiB(dst, 1);
+ __ XorV(dst, dst, src);
+ break;
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ NorV(dst, src, src); // lanes do not matter
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector binary operations.
@@ -106,9 +289,12 @@ static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation*
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
- DCHECK(locations);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -121,7 +307,40 @@ void LocationsBuilderMIPS64::VisitVecAdd(HVecAdd* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ AddvB(dst, lhs, rhs);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ AddvH(dst, lhs, rhs);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ AddvW(dst, lhs, rhs);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ AddvD(dst, lhs, rhs);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ FaddW(dst, lhs, rhs);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ FaddD(dst, lhs, rhs);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
@@ -129,7 +348,40 @@ void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ instruction->IsRounded()
+ ? __ Aver_uB(dst, lhs, rhs)
+ : __ Ave_uB(dst, lhs, rhs);
+ } else {
+ instruction->IsRounded()
+ ? __ Aver_sB(dst, lhs, rhs)
+ : __ Ave_sB(dst, lhs, rhs);
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ instruction->IsRounded()
+ ? __ Aver_uH(dst, lhs, rhs)
+ : __ Ave_uH(dst, lhs, rhs);
+ } else {
+ instruction->IsRounded()
+ ? __ Aver_sH(dst, lhs, rhs)
+ : __ Ave_sH(dst, lhs, rhs);
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) {
@@ -137,7 +389,40 @@ void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitVecSub(HVecSub* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ SubvB(dst, lhs, rhs);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ SubvH(dst, lhs, rhs);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ SubvW(dst, lhs, rhs);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ SubvD(dst, lhs, rhs);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ FsubW(dst, lhs, rhs);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ FsubD(dst, lhs, rhs);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) {
@@ -145,7 +430,40 @@ void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitVecMul(HVecMul* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ MulvB(dst, lhs, rhs);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ MulvH(dst, lhs, rhs);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ MulvW(dst, lhs, rhs);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ MulvD(dst, lhs, rhs);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ FmulW(dst, lhs, rhs);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ FmulD(dst, lhs, rhs);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderMIPS64::VisitVecDiv(HVecDiv* instruction) {
@@ -153,7 +471,23 @@ void LocationsBuilderMIPS64::VisitVecDiv(HVecDiv* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ FdivW(dst, lhs, rhs);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ FdivD(dst, lhs, rhs);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderMIPS64::VisitVecMin(HVecMin* instruction) {
@@ -177,7 +511,27 @@ void LocationsBuilderMIPS64::VisitVecAnd(HVecAnd* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitVecAnd(HVecAnd* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ AndV(dst, lhs, rhs); // lanes do not matter
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderMIPS64::VisitVecAndNot(HVecAndNot* instruction) {
@@ -193,7 +547,27 @@ void LocationsBuilderMIPS64::VisitVecOr(HVecOr* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitVecOr(HVecOr* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ OrV(dst, lhs, rhs); // lanes do not matter
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderMIPS64::VisitVecXor(HVecXor* instruction) {
@@ -201,7 +575,27 @@ void LocationsBuilderMIPS64::VisitVecXor(HVecXor* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitVecXor(HVecXor* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ XorV(dst, lhs, rhs); // lanes do not matter
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector shift operations.
@@ -213,7 +607,9 @@ static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation*
case Primitive::kPrimShort:
case Primitive::kPrimInt:
case Primitive::kPrimLong:
- DCHECK(locations);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -226,7 +622,32 @@ void LocationsBuilderMIPS64::VisitVecShl(HVecShl* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitVecShl(HVecShl* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ SlliB(dst, lhs, value);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ SlliH(dst, lhs, value);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ SlliW(dst, lhs, value);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ SlliD(dst, lhs, value);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderMIPS64::VisitVecShr(HVecShr* instruction) {
@@ -234,7 +655,32 @@ void LocationsBuilderMIPS64::VisitVecShr(HVecShr* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitVecShr(HVecShr* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ SraiB(dst, lhs, value);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ SraiH(dst, lhs, value);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ SraiW(dst, lhs, value);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ SraiD(dst, lhs, value);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderMIPS64::VisitVecUShr(HVecUShr* instruction) {
@@ -242,7 +688,32 @@ void LocationsBuilderMIPS64::VisitVecUShr(HVecUShr* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ SrliB(dst, lhs, value);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ SrliH(dst, lhs, value);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ SrliW(dst, lhs, value);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ SrliD(dst, lhs, value);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
@@ -253,20 +724,143 @@ void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccu
LOG(FATAL) << "No SIMD for " << instr->GetId();
}
+// Helper to set up locations for vector memory operations.
+static void CreateVecMemLocations(ArenaAllocator* arena,
+ HVecMemoryOperation* instruction,
+ bool is_load) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ if (is_load) {
+ locations->SetOut(Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to prepare register and offset for vector memory operations. Returns the offset and sets
+// the output parameter adjusted_base to the original base or to a reserved temporary register (AT).
+int32_t InstructionCodeGeneratorMIPS64::VecAddress(LocationSummary* locations,
+ size_t size,
+ /* out */ GpuRegister* adjusted_base) {
+ GpuRegister base = locations->InAt(0).AsRegister<GpuRegister>();
+ Location index = locations->InAt(1);
+ int scale = TIMES_1;
+ switch (size) {
+ case 2: scale = TIMES_2; break;
+ case 4: scale = TIMES_4; break;
+ case 8: scale = TIMES_8; break;
+ default: break;
+ }
+ int32_t offset = mirror::Array::DataOffset(size).Int32Value();
+
+ if (index.IsConstant()) {
+ offset += index.GetConstant()->AsIntConstant()->GetValue() << scale;
+ __ AdjustBaseOffsetAndElementSizeShift(base, offset, scale);
+ *adjusted_base = base;
+ } else {
+ GpuRegister index_reg = index.AsRegister<GpuRegister>();
+ if (scale != TIMES_1) {
+ __ Dlsa(AT, index_reg, base, scale);
+ } else {
+ __ Daddu(AT, base, index_reg);
+ }
+ *adjusted_base = AT;
+ }
+ return offset;
+}
+
void LocationsBuilderMIPS64::VisitVecLoad(HVecLoad* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ CreateVecMemLocations(GetGraph()->GetArena(), instruction, /* is_load */ true);
}
void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+ VectorRegister reg = VectorRegisterFrom(locations->Out());
+ GpuRegister base;
+ int32_t offset = VecAddress(locations, size, &base);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ LdB(reg, base, offset);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ // Loading 8-bytes (needed if dealing with compressed strings in StringCharAt) from unaligned
+ // memory address may cause a trap to the kernel if the CPU doesn't directly support unaligned
+ // loads and stores.
+ // TODO: Implement support for StringCharAt.
+ DCHECK(!instruction->IsStringCharAt());
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ LdH(reg, base, offset);
+ break;
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ LdW(reg, base, offset);
+ break;
+ case Primitive::kPrimLong:
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ LdD(reg, base, offset);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderMIPS64::VisitVecStore(HVecStore* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ CreateVecMemLocations(GetGraph()->GetArena(), instruction, /* is_load */ false);
}
void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+ VectorRegister reg = VectorRegisterFrom(locations->InAt(2));
+ GpuRegister base;
+ int32_t offset = VecAddress(locations, size, &base);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ StB(reg, base, offset);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ StH(reg, base, offset);
+ break;
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ StW(reg, base, offset);
+ break;
+ case Primitive::kPrimLong:
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ StD(reg, base, offset);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
#undef __
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 013b092b5a..14782d70a1 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -201,6 +201,7 @@ void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) {
void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+ // Integral-abs requires a temporary for the comparison.
if (instruction->GetPackedType() == Primitive::kPrimInt) {
instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
}
@@ -482,7 +483,51 @@ void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) {
}
void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pminub(dst, src);
+ } else {
+ __ pminsb(dst, src);
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pminuw(dst, src);
+ } else {
+ __ pminsw(dst, src);
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pminud(dst, src);
+ } else {
+ __ pminsd(dst, src);
+ }
+ break;
+ // Next cases are sloppy wrt 0.0 vs -0.0.
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ minps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ minpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) {
@@ -490,7 +535,51 @@ void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) {
}
void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pmaxub(dst, src);
+ } else {
+ __ pmaxsb(dst, src);
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pmaxuw(dst, src);
+ } else {
+ __ pmaxsw(dst, src);
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pmaxud(dst, src);
+ } else {
+ __ pmaxsd(dst, src);
+ }
+ break;
+ // Next cases are sloppy wrt 0.0 vs -0.0.
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ maxps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ maxpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) {
@@ -766,16 +855,10 @@ static void CreateVecMemLocations(ArenaAllocator* arena,
}
}
-// Helper to set up registers and address for vector memory operations.
-static Address CreateVecMemRegisters(HVecMemoryOperation* instruction,
- Location* reg_loc,
- bool is_load) {
- LocationSummary* locations = instruction->GetLocations();
+// Helper to construct address for vector memory operations.
+static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
Location base = locations->InAt(0);
Location index = locations->InAt(1);
- *reg_loc = is_load ? locations->Out() : locations->InAt(2);
- size_t size = Primitive::ComponentSize(instruction->GetPackedType());
- uint32_t offset = mirror::Array::DataOffset(size).Uint32Value();
ScaleFactor scale = TIMES_1;
switch (size) {
case 2: scale = TIMES_2; break;
@@ -783,22 +866,53 @@ static Address CreateVecMemRegisters(HVecMemoryOperation* instruction,
case 8: scale = TIMES_8; break;
default: break;
}
+ uint32_t offset = is_string_char_at
+ ? mirror::String::ValueOffset().Uint32Value()
+ : mirror::Array::DataOffset(size).Uint32Value();
return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset);
}
void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) {
CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true);
+ // String load requires a temporary for the compressed load.
+ if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+ }
}
void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
- Location reg_loc = Location::NoLocation();
- Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true);
- XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+ LocationSummary* locations = instruction->GetLocations();
+ size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+ Address address = VecAddress(locations, size, instruction->IsStringCharAt());
+ XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ // Special handling of compressed/uncompressed string load.
+ if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ NearLabel done, not_compressed;
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ // Test compression bit.
+ static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+ "Expecting 0=compressed, 1=uncompressed");
+ uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ __ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1));
+ __ j(kNotZero, &not_compressed);
+ // Zero extend 8 compressed bytes into 8 chars.
+ __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true));
+ __ pxor(tmp, tmp);
+ __ punpcklbw(reg, tmp);
+ __ jmp(&done);
+ // Load 4 direct uncompressed chars.
+ __ Bind(&not_compressed);
+ is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
+ __ Bind(&done);
+ return;
+ }
+ FALLTHROUGH_INTENDED;
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
case Primitive::kPrimLong:
@@ -825,9 +939,10 @@ void LocationsBuilderX86::VisitVecStore(HVecStore* instruction) {
}
void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) {
- Location reg_loc = Location::NoLocation();
- Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false);
- XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+ LocationSummary* locations = instruction->GetLocations();
+ size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+ Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
+ XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index 66f19a4376..246044ebb8 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -194,6 +194,7 @@ void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) {
void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+ // Integral-abs requires a temporary for the comparison.
if (instruction->GetPackedType() == Primitive::kPrimInt) {
instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
}
@@ -352,6 +353,10 @@ void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruct
DCHECK(locations->InAt(0).Equals(locations->Out()));
XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+
+ DCHECK(instruction->IsRounded());
+ DCHECK(instruction->IsUnsigned());
+
switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
DCHECK_EQ(16u, instruction->GetVectorLength());
@@ -471,7 +476,51 @@ void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) {
}
void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pminub(dst, src);
+ } else {
+ __ pminsb(dst, src);
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pminuw(dst, src);
+ } else {
+ __ pminsw(dst, src);
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pminud(dst, src);
+ } else {
+ __ pminsd(dst, src);
+ }
+ break;
+ // Next cases are sloppy wrt 0.0 vs -0.0.
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ minps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ minpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) {
@@ -479,7 +528,51 @@ void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) {
}
void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pmaxub(dst, src);
+ } else {
+ __ pmaxsb(dst, src);
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pmaxuw(dst, src);
+ } else {
+ __ pmaxsw(dst, src);
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pmaxud(dst, src);
+ } else {
+ __ pmaxsd(dst, src);
+ }
+ break;
+ // Next cases are sloppy wrt 0.0 vs -0.0.
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ maxps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ maxpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) {
@@ -755,16 +848,10 @@ static void CreateVecMemLocations(ArenaAllocator* arena,
}
}
-// Helper to set up registers and address for vector memory operations.
-static Address CreateVecMemRegisters(HVecMemoryOperation* instruction,
- Location* reg_loc,
- bool is_load) {
- LocationSummary* locations = instruction->GetLocations();
+// Helper to construct address for vector memory operations.
+static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
Location base = locations->InAt(0);
Location index = locations->InAt(1);
- *reg_loc = is_load ? locations->Out() : locations->InAt(2);
- size_t size = Primitive::ComponentSize(instruction->GetPackedType());
- uint32_t offset = mirror::Array::DataOffset(size).Uint32Value();
ScaleFactor scale = TIMES_1;
switch (size) {
case 2: scale = TIMES_2; break;
@@ -772,22 +859,53 @@ static Address CreateVecMemRegisters(HVecMemoryOperation* instruction,
case 8: scale = TIMES_8; break;
default: break;
}
+ uint32_t offset = is_string_char_at
+ ? mirror::String::ValueOffset().Uint32Value()
+ : mirror::Array::DataOffset(size).Uint32Value();
return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset);
}
void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) {
CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true);
+ // String load requires a temporary for the compressed load.
+ if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+ }
}
void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
- Location reg_loc = Location::NoLocation();
- Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true);
- XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+ LocationSummary* locations = instruction->GetLocations();
+ size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+ Address address = VecAddress(locations, size, instruction->IsStringCharAt());
+ XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ // Special handling of compressed/uncompressed string load.
+ if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ NearLabel done, not_compressed;
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ // Test compression bit.
+ static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+ "Expecting 0=compressed, 1=uncompressed");
+ uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ __ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1));
+ __ j(kNotZero, &not_compressed);
+ // Zero extend 8 compressed bytes into 8 chars.
+ __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true));
+ __ pxor(tmp, tmp);
+ __ punpcklbw(reg, tmp);
+ __ jmp(&done);
+ // Load 8 direct uncompressed chars.
+ __ Bind(&not_compressed);
+ is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
+ __ Bind(&done);
+ return;
+ }
+ FALLTHROUGH_INTENDED;
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
case Primitive::kPrimLong:
@@ -814,9 +932,10 @@ void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) {
}
void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) {
- Location reg_loc = Location::NoLocation();
- Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false);
- XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+ LocationSummary* locations = instruction->GetLocations();
+ size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+ Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
+ XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 7e640c284f..b8465cd9d5 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -26,6 +26,7 @@
#include "intrinsics_x86.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
+#include "lock_word.h"
#include "thread.h"
#include "utils/assembler.h"
#include "utils/stack_checks.h"
@@ -1032,9 +1033,10 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
assembler_(graph->GetArena()),
isa_features_(isa_features),
pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
constant_area_start_(-1),
@@ -2066,6 +2068,15 @@ void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant
// Will be generated at use site.
}
+void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) {
+ constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86::VisitConstructorFence(
+ HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
memory_barrier->SetLocations(nullptr);
}
@@ -2158,7 +2169,7 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok
IntrinsicLocationsBuilderX86 intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
- if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
+ if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeMethodLoadKind()) {
invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
}
return;
@@ -2167,7 +2178,7 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok
HandleInvoke(invoke);
// For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
- if (invoke->HasPcRelativeDexCache()) {
+ if (invoke->HasPcRelativeMethodLoadKind()) {
invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
}
}
@@ -4510,18 +4521,16 @@ Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOr
// save one load. However, since this is just an intrinsic slow path we prefer this
// simple and more robust approach rather that trying to determine if that's the case.
SlowPathCode* slow_path = GetCurrentSlowPath();
- if (slow_path != nullptr) {
- if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
- int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
- __ movl(temp, Address(ESP, stack_offset));
- return temp;
- }
+ DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path.
+ if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+ int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
+ __ movl(temp, Address(ESP, stack_offset));
+ return temp;
}
return location.AsRegister<Register>();
}
-Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
- Location temp) {
+void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
switch (invoke->GetMethodLoadKind()) {
case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
@@ -4534,6 +4543,14 @@ Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
+ DCHECK(GetCompilerOptions().IsBootImage());
+ Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
+ temp.AsRegister<Register>());
+ __ leal(temp.AsRegister<Register>(), Address(base_reg, CodeGeneratorX86::kDummy32BitOffset));
+ RecordBootMethodPatch(invoke);
+ break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
__ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
break;
@@ -4571,11 +4588,6 @@ Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO
break;
}
}
- return callee_method;
-}
-
-void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
- Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
switch (invoke->GetCodePtrLocation()) {
case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
@@ -4622,27 +4634,18 @@ void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp
temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
}
-void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) {
- DCHECK(GetCompilerOptions().IsBootImage());
- HX86ComputeBaseMethodAddress* address = nullptr;
- if (GetCompilerOptions().GetCompilePic()) {
- address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
- } else {
- DCHECK_EQ(load_string->InputCount(), 0u);
- }
- string_patches_.emplace_back(address,
- load_string->GetDexFile(),
- load_string->GetStringIndex().index_);
- __ Bind(&string_patches_.back().label);
+void CodeGeneratorX86::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) {
+ DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+ HX86ComputeBaseMethodAddress* address =
+ invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
+ boot_image_method_patches_.emplace_back(address,
+ *invoke->GetTargetMethod().dex_file,
+ invoke->GetTargetMethod().dex_method_index);
+ __ Bind(&boot_image_method_patches_.back().label);
}
void CodeGeneratorX86::RecordBootTypePatch(HLoadClass* load_class) {
- HX86ComputeBaseMethodAddress* address = nullptr;
- if (GetCompilerOptions().GetCompilePic()) {
- address = load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
- } else {
- DCHECK_EQ(load_class->InputCount(), 0u);
- }
+ HX86ComputeBaseMethodAddress* address = load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
boot_image_type_patches_.emplace_back(address,
load_class->GetDexFile(),
load_class->GetTypeIndex().index_);
@@ -4657,6 +4660,15 @@ Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) {
return &type_bss_entry_patches_.back().label;
}
+void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) {
+ DCHECK(GetCompilerOptions().IsBootImage());
+ HX86ComputeBaseMethodAddress* address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
+ string_patches_.emplace_back(address,
+ load_string->GetDexFile(),
+ load_string->GetStringIndex().index_);
+ __ Bind(&string_patches_.back().label);
+}
+
Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
DCHECK(!GetCompilerOptions().IsBootImage());
HX86ComputeBaseMethodAddress* address =
@@ -4694,29 +4706,23 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche
DCHECK(linker_patches->empty());
size_t size =
pc_relative_dex_cache_patches_.size() +
- string_patches_.size() +
+ boot_image_method_patches_.size() +
boot_image_type_patches_.size() +
- type_bss_entry_patches_.size();
+ type_bss_entry_patches_.size() +
+ string_patches_.size();
linker_patches->reserve(size);
EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
linker_patches);
- if (!GetCompilerOptions().IsBootImage()) {
- DCHECK(boot_image_type_patches_.empty());
- EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
- } else if (GetCompilerOptions().GetCompilePic()) {
+ if (GetCompilerOptions().IsBootImage()) {
+ EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_,
+ linker_patches);
EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_,
linker_patches);
EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches);
} else {
- for (const PatchInfo<Label>& info : boot_image_type_patches_) {
- uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
- linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, &info.dex_file, info.index));
- }
- for (const PatchInfo<Label>& info : string_patches_) {
- uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
- linker_patches->push_back(
- LinkerPatch::StringPatch(literal_offset, &info.dex_file, info.index));
- }
+ DCHECK(boot_image_method_patches_.empty());
+ DCHECK(boot_image_type_patches_.empty());
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
}
EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
linker_patches);
@@ -6045,21 +6051,15 @@ HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
UNREACHABLE();
case HLoadClass::LoadKind::kReferrersClass:
break;
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
- DCHECK(!GetCompilerOptions().GetCompilePic());
- break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- DCHECK(GetCompilerOptions().GetCompilePic());
- FALLTHROUGH_INTENDED;
case HLoadClass::LoadKind::kBssEntry:
- DCHECK(!Runtime::Current()->UseJitCompilation()); // Note: boot image is also non-JIT.
- break;
- case HLoadClass::LoadKind::kBootImageAddress:
+ DCHECK(!Runtime::Current()->UseJitCompilation());
break;
case HLoadClass::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadClass::LoadKind::kDexCacheViaMethod:
+ case HLoadClass::LoadKind::kBootImageAddress:
+ case HLoadClass::LoadKind::kRuntimeCall:
break;
}
return desired_class_load_kind;
@@ -6067,7 +6067,7 @@ HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
- if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+ if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
InvokeRuntimeCallingConvention calling_convention;
CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
cls,
@@ -6121,7 +6121,7 @@ Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file,
// move.
void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
- if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+ if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
codegen_->GenerateLoadClassRuntimeCall(cls);
return;
}
@@ -6149,13 +6149,6 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
read_barrier_option);
break;
}
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
- DCHECK(codegen_->GetCompilerOptions().IsBootImage());
- DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
- __ movl(out, Immediate(/* placeholder */ 0));
- codegen_->RecordBootTypePatch(cls);
- break;
- }
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
@@ -6188,7 +6181,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
break;
}
- case HLoadClass::LoadKind::kDexCacheViaMethod:
+ case HLoadClass::LoadKind::kRuntimeCall:
case HLoadClass::LoadKind::kInvalid:
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
@@ -6243,21 +6236,15 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
- case HLoadString::LoadKind::kBootImageLinkTimeAddress:
- DCHECK(!GetCompilerOptions().GetCompilePic());
- break;
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- DCHECK(GetCompilerOptions().GetCompilePic());
- FALLTHROUGH_INTENDED;
case HLoadString::LoadKind::kBssEntry:
- DCHECK(!Runtime::Current()->UseJitCompilation()); // Note: boot image is also non-JIT.
- break;
- case HLoadString::LoadKind::kBootImageAddress:
+ DCHECK(!Runtime::Current()->UseJitCompilation());
break;
case HLoadString::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadString::LoadKind::kDexCacheViaMethod:
+ case HLoadString::LoadKind::kBootImageAddress:
+ case HLoadString::LoadKind::kRuntimeCall:
break;
}
return desired_string_load_kind;
@@ -6271,7 +6258,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
load_kind == HLoadString::LoadKind::kBssEntry) {
locations->SetInAt(0, Location::RequiresRegister());
}
- if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+ if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
locations->SetOut(Location::RegisterLocation(EAX));
} else {
locations->SetOut(Location::RequiresRegister());
@@ -6308,12 +6295,6 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S
Register out = out_loc.AsRegister<Register>();
switch (load->GetLoadKind()) {
- case HLoadString::LoadKind::kBootImageLinkTimeAddress: {
- DCHECK(codegen_->GetCompilerOptions().IsBootImage());
- __ movl(out, Immediate(/* placeholder */ 0));
- codegen_->RecordBootStringPatch(load);
- return; // No dex cache slow path.
- }
case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
Register method_address = locations->InAt(0).AsRegister<Register>();
@@ -7694,7 +7675,7 @@ void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
constant_area_start_ = assembler->CodeSize();
// Populate any jump tables.
- for (auto jump_table : fixups_to_jump_tables_) {
+ for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
jump_table->CreateJumpTable();
}
@@ -7833,17 +7814,19 @@ void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
for (const PatchInfo<Label>& info : jit_string_patches_) {
- const auto& it = jit_string_roots_.find(
+ const auto it = jit_string_roots_.find(
StringReference(&info.dex_file, dex::StringIndex(info.index)));
DCHECK(it != jit_string_roots_.end());
- PatchJitRootUse(code, roots_data, info, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, info, index_in_table);
}
for (const PatchInfo<Label>& info : jit_class_patches_) {
- const auto& it = jit_class_roots_.find(
+ const auto it = jit_class_roots_.find(
TypeReference(&info.dex_file, dex::TypeIndex(info.index)));
DCHECK(it != jit_class_roots_.end());
- PatchJitRootUse(code, roots_data, info, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, info, index_in_table);
}
}
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index ca3a9eadd2..8130bd9d25 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -408,14 +408,14 @@ class CodeGeneratorX86 : public CodeGenerator {
HInvokeStaticOrDirect* invoke) OVERRIDE;
// Generate a call to a static or direct method.
- Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
// Generate a call to a virtual method.
void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
- void RecordBootStringPatch(HLoadString* load_string);
+ void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke);
void RecordBootTypePatch(HLoadClass* load_class);
Label* NewTypeBssEntryPatch(HLoadClass* load_class);
+ void RecordBootStringPatch(HLoadString* load_string);
Label* NewStringBssEntryPatch(HLoadString* load_string);
Label* NewPcRelativeDexCacheArrayPatch(HX86ComputeBaseMethodAddress* method_address,
const DexFile& dex_file,
@@ -633,16 +633,17 @@ class CodeGeneratorX86 : public CodeGenerator {
// PC-relative DexCache access info.
ArenaDeque<X86PcRelativePatchInfo> pc_relative_dex_cache_patches_;
- // String patch locations; type depends on configuration (app .bss or boot image PIC/non-PIC).
- ArenaDeque<X86PcRelativePatchInfo> string_patches_;
- // Type patch locations for boot image; type depends on configuration (boot image PIC/non-PIC).
+ // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_;
+ // PC-relative type patch info for kBootImageLinkTimePcRelative.
ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_;
// Type patch locations for kBssEntry.
ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_;
+ // String patch locations; type depends on configuration (app .bss or boot image).
+ ArenaDeque<X86PcRelativePatchInfo> string_patches_;
// Patches for string root accesses in JIT compiled code.
ArenaDeque<PatchInfo<Label>> jit_string_patches_;
-
// Patches for class root accesses in JIT compiled code.
ArenaDeque<PatchInfo<Label>> jit_class_patches_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index dfb11aaba5..8dde298267 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -23,6 +23,7 @@
#include "gc/accounting/card_table.h"
#include "intrinsics.h"
#include "intrinsics_x86_64.h"
+#include "lock_word.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
#include "mirror/object_reference.h"
@@ -976,9 +977,10 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStati
return desired_dispatch_info;
}
-Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
- Location temp) {
+void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+ Location temp) {
// All registers are assumed to be correctly set up.
+
Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
switch (invoke->GetMethodLoadKind()) {
case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
@@ -991,6 +993,12 @@ Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStat
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
+ DCHECK(GetCompilerOptions().IsBootImage());
+ __ leal(temp.AsRegister<CpuRegister>(),
+ Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
+ RecordBootMethodPatch(invoke);
+ break;
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
break;
@@ -1025,13 +1033,6 @@ Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStat
break;
}
}
- return callee_method;
-}
-
-void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
- Location temp) {
- // All registers are assumed to be correctly set up.
- Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
switch (invoke->GetCodePtrLocation()) {
case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
@@ -1079,10 +1080,10 @@ void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t
kX86_64PointerSize).SizeValue()));
}
-void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) {
- DCHECK(GetCompilerOptions().IsBootImage());
- string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
- __ Bind(&string_patches_.back().label);
+void CodeGeneratorX86_64::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) {
+ boot_image_method_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
+ invoke->GetTargetMethod().dex_method_index);
+ __ Bind(&boot_image_method_patches_.back().label);
}
void CodeGeneratorX86_64::RecordBootTypePatch(HLoadClass* load_class) {
@@ -1096,6 +1097,12 @@ Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
return &type_bss_entry_patches_.back().label;
}
+void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) {
+ DCHECK(GetCompilerOptions().IsBootImage());
+ string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
+ __ Bind(&string_patches_.back().label);
+}
+
Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
DCHECK(!GetCompilerOptions().IsBootImage());
string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
@@ -1128,20 +1135,23 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat
DCHECK(linker_patches->empty());
size_t size =
pc_relative_dex_cache_patches_.size() +
- string_patches_.size() +
+ boot_image_method_patches_.size() +
boot_image_type_patches_.size() +
- type_bss_entry_patches_.size();
+ type_bss_entry_patches_.size() +
+ string_patches_.size();
linker_patches->reserve(size);
EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
linker_patches);
- if (!GetCompilerOptions().IsBootImage()) {
- DCHECK(boot_image_type_patches_.empty());
- EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
- } else {
- // These are always PC-relative, see GetSupportedLoadClassKind()/GetSupportedLoadStringKind().
+ if (GetCompilerOptions().IsBootImage()) {
+ EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_,
+ linker_patches);
EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_,
linker_patches);
EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches);
+ } else {
+ DCHECK(boot_image_method_patches_.empty());
+ DCHECK(boot_image_type_patches_.empty());
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
}
EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
linker_patches);
@@ -1232,12 +1242,13 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
isa_features_(isa_features),
constant_area_start_(0),
pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+ jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
}
@@ -2174,6 +2185,15 @@ void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
// Will be generated at use site.
}
+void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
+ constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitConstructorFence(
+ HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
memory_barrier->SetLocations(nullptr);
}
@@ -5449,22 +5469,15 @@ HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
UNREACHABLE();
case HLoadClass::LoadKind::kReferrersClass:
break;
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
- DCHECK(!GetCompilerOptions().GetCompilePic());
- // We prefer the always-available RIP-relative address for the x86-64 boot image.
- return HLoadClass::LoadKind::kBootImageLinkTimePcRelative;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- DCHECK(GetCompilerOptions().GetCompilePic());
- break;
- case HLoadClass::LoadKind::kBootImageAddress:
- break;
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
case HLoadClass::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadClass::LoadKind::kDexCacheViaMethod:
+ case HLoadClass::LoadKind::kBootImageAddress:
+ case HLoadClass::LoadKind::kRuntimeCall:
break;
}
return desired_class_load_kind;
@@ -5472,7 +5485,7 @@ HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
- if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+ if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
// Custom calling convention: RAX serves as both input and output.
CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
cls,
@@ -5523,7 +5536,7 @@ Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
// move.
void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
- if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+ if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
codegen_->GenerateLoadClassRuntimeCall(cls);
return;
}
@@ -5626,22 +5639,15 @@ void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
- case HLoadString::LoadKind::kBootImageLinkTimeAddress:
- DCHECK(!GetCompilerOptions().GetCompilePic());
- // We prefer the always-available RIP-relative address for the x86-64 boot image.
- return HLoadString::LoadKind::kBootImageLinkTimePcRelative;
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- DCHECK(GetCompilerOptions().GetCompilePic());
- break;
- case HLoadString::LoadKind::kBootImageAddress:
- break;
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
case HLoadString::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadString::LoadKind::kDexCacheViaMethod:
+ case HLoadString::LoadKind::kBootImageAddress:
+ case HLoadString::LoadKind::kRuntimeCall:
break;
}
return desired_string_load_kind;
@@ -5650,7 +5656,7 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
- if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
+ if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
locations->SetOut(Location::RegisterLocation(RAX));
} else {
locations->SetOut(Location::RequiresRegister());
@@ -7046,7 +7052,7 @@ void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
constant_area_start_ = assembler->CodeSize();
// Populate any jump tables.
- for (auto jump_table : fixups_to_jump_tables_) {
+ for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
jump_table->CreateJumpTable();
}
@@ -7140,17 +7146,19 @@ void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
for (const PatchInfo<Label>& info : jit_string_patches_) {
- const auto& it = jit_string_roots_.find(
+ const auto it = jit_string_roots_.find(
StringReference(&info.dex_file, dex::StringIndex(info.index)));
DCHECK(it != jit_string_roots_.end());
- PatchJitRootUse(code, roots_data, info, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, info, index_in_table);
}
for (const PatchInfo<Label>& info : jit_class_patches_) {
- const auto& it = jit_class_roots_.find(
+ const auto it = jit_class_roots_.find(
TypeReference(&info.dex_file, dex::TypeIndex(info.index)));
DCHECK(it != jit_class_roots_.end());
- PatchJitRootUse(code, roots_data, info, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, info, index_in_table);
}
}
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index c8336dabd9..25479814d0 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -404,13 +404,13 @@ class CodeGeneratorX86_64 : public CodeGenerator {
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
HInvokeStaticOrDirect* invoke) OVERRIDE;
- Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
- void RecordBootStringPatch(HLoadString* load_string);
+ void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke);
void RecordBootTypePatch(HLoadClass* load_class);
Label* NewTypeBssEntryPatch(HLoadClass* load_class);
+ void RecordBootStringPatch(HLoadString* load_string);
Label* NewStringBssEntryPatch(HLoadString* load_string);
Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
Label* NewJitRootStringPatch(const DexFile& dex_file,
@@ -603,22 +603,23 @@ class CodeGeneratorX86_64 : public CodeGenerator {
// PC-relative DexCache access info.
ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_;
- // String patch locations; type depends on configuration (app .bss or boot image PIC).
- ArenaDeque<PatchInfo<Label>> string_patches_;
- // Type patch locations for boot image (always PIC).
+ // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ ArenaDeque<PatchInfo<Label>> boot_image_method_patches_;
+ // PC-relative type patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PatchInfo<Label>> boot_image_type_patches_;
// Type patch locations for kBssEntry.
ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_;
-
- // Fixups for jump tables need to be handled specially.
- ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
+ // String patch locations; type depends on configuration (app .bss or boot image).
+ ArenaDeque<PatchInfo<Label>> string_patches_;
// Patches for string literals in JIT compiled code.
ArenaDeque<PatchInfo<Label>> jit_string_patches_;
-
// Patches for class literals in JIT compiled code.
ArenaDeque<PatchInfo<Label>> jit_class_patches_;
+ // Fixups for jump tables need to be handled specially.
+ ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
+
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
};
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
index 0b4dcd30a1..e598e19b67 100644
--- a/compiler/optimizing/code_sinking.cc
+++ b/compiler/optimizing/code_sinking.cc
@@ -56,6 +56,17 @@ static bool IsInterestingInstruction(HInstruction* instruction) {
return true;
}
+ // Check it is safe to move ConstructorFence.
+ // (Safe to move ConstructorFence for only protecting the new-instance but not for finals.)
+ if (instruction->IsConstructorFence()) {
+ HConstructorFence* ctor_fence = instruction->AsConstructorFence();
+
+ // A fence with "0" inputs is dead and should've been removed in a prior pass.
+ DCHECK_NE(0u, ctor_fence->InputCount());
+
+ return ctor_fence->GetAssociatedAllocation() != nullptr;
+ }
+
// All other instructions that can throw cannot be moved.
if (instruction->CanThrow()) {
return false;
@@ -134,11 +145,11 @@ static bool ShouldFilterUse(HInstruction* instruction,
HInstruction* user,
const ArenaBitVector& post_dominated) {
if (instruction->IsNewInstance()) {
- return user->IsInstanceFieldSet() &&
+ return (user->IsInstanceFieldSet() || user->IsConstructorFence()) &&
(user->InputAt(0) == instruction) &&
!post_dominated.IsBitSet(user->GetBlock()->GetBlockId());
} else if (instruction->IsNewArray()) {
- return user->IsArraySet() &&
+ return (user->IsArraySet() || user->IsConstructorFence()) &&
(user->InputAt(0) == instruction) &&
!post_dominated.IsBitSet(user->GetBlock()->GetBlockId());
}
@@ -372,7 +383,9 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
// Step (3): Try to move sinking candidates.
for (HInstruction* instruction : move_in_order) {
HInstruction* position = nullptr;
- if (instruction->IsArraySet() || instruction->IsInstanceFieldSet()) {
+ if (instruction->IsArraySet()
+ || instruction->IsInstanceFieldSet()
+ || instruction->IsConstructorFence()) {
if (!instructions_that_can_move.IsBitSet(instruction->InputAt(0)->GetId())) {
// A store can trivially move, but it can safely do so only if the heap
// location it stores to can also move.
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 4ba5c5580f..fe25b7690d 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -64,7 +64,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
#endif
};
- for (auto test_config : test_config_candidates) {
+ for (const CodegenTargetConfig& test_config : test_config_candidates) {
if (CanExecute(test_config.GetInstructionSet())) {
v.push_back(test_config);
}
@@ -76,7 +76,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
static void TestCode(const uint16_t* data,
bool has_result = false,
int32_t expected = 0) {
- for (CodegenTargetConfig target_config : GetTargetConfigs()) {
+ for (const CodegenTargetConfig& target_config : GetTargetConfigs()) {
ArenaPool pool;
ArenaAllocator arena(&pool);
HGraph* graph = CreateCFG(&arena, data);
@@ -89,7 +89,7 @@ static void TestCode(const uint16_t* data,
static void TestCodeLong(const uint16_t* data,
bool has_result,
int64_t expected) {
- for (CodegenTargetConfig target_config : GetTargetConfigs()) {
+ for (const CodegenTargetConfig& target_config : GetTargetConfigs()) {
ArenaPool pool;
ArenaAllocator arena(&pool);
HGraph* graph = CreateCFG(&arena, data, Primitive::kPrimLong);
@@ -754,7 +754,28 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) {
//
// Assertion failed (!available->IsEmpty())
//
- // in vixl::aarch64::UseScratchRegisterScope::AcquireNextAvailable.
+ // in vixl::aarch64::UseScratchRegisterScope::AcquireNextAvailable,
+ // because of the following situation:
+ //
+ // 1. a temp register (IP0) is allocated as a scratch register by
+ // the parallel move resolver to solve a cycle (swap):
+ //
+ // [ source=DS0 destination=DS257 type=PrimDouble instruction=null ]
+ // [ source=DS257 destination=DS0 type=PrimDouble instruction=null ]
+ //
+ // 2. within CodeGeneratorARM64::MoveLocation, another temp
+ // register (IP1) is allocated to generate the swap between two
+ // double stack slots;
+ //
+ // 3. VIXL requires a third temp register to emit the `Ldr` or
+ // `Str` operation from CodeGeneratorARM64::MoveLocation (as
+ // one of the stack slots' offsets cannot be encoded as an
+ // immediate), but the pool of (core) temp registers is now
+ // empty.
+ //
+ // The solution used so far is to use a floating-point temp register
+ // (D31) in step #2, so that IP1 is available for step #3.
+
HParallelMove* move = new (graph->GetArena()) HParallelMove(graph->GetArena());
move->AddMove(Location::DoubleStackSlot(0),
Location::DoubleStackSlot(257),
@@ -807,7 +828,6 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) {
InternalCodeAllocator code_allocator;
codegen.Finalize(&code_allocator);
}
-
#endif
#ifdef ART_ENABLE_CODEGEN_mips
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index 31cd204c9f..00a16fe849 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -243,7 +243,7 @@ static void ValidateGraph(HGraph* graph) {
GraphChecker graph_checker(graph);
graph_checker.Run();
if (!graph_checker.IsValid()) {
- for (const auto& error : graph_checker.GetErrors()) {
+ for (const std::string& error : graph_checker.GetErrors()) {
std::cout << error << std::endl;
}
}
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 34b52a87b5..aea901dec7 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -338,14 +338,21 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) {
// Ensure the inputs of `instruction` are defined in a block of the graph.
for (HInstruction* input : instruction->GetInputs()) {
- const HInstructionList& list = input->IsPhi()
- ? input->GetBlock()->GetPhis()
- : input->GetBlock()->GetInstructions();
- if (!list.Contains(input)) {
- AddError(StringPrintf("Input %d of instruction %d is not defined "
- "in a basic block of the control-flow graph.",
+ if (input->GetBlock() == nullptr) {
+ AddError(StringPrintf("Input %d of instruction %d is not in any "
+ "basic block of the control-flow graph.",
input->GetId(),
instruction->GetId()));
+ } else {
+ const HInstructionList& list = input->IsPhi()
+ ? input->GetBlock()->GetPhis()
+ : input->GetBlock()->GetInstructions();
+ if (!list.Contains(input)) {
+ AddError(StringPrintf("Input %d of instruction %d is not defined "
+ "in a basic block of the control-flow graph.",
+ input->GetId(),
+ instruction->GetId()));
+ }
}
}
@@ -497,8 +504,7 @@ void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
"has a null pointer as last input.",
invoke->DebugName(),
invoke->GetId()));
- }
- if (!last_input->IsClinitCheck() && !last_input->IsLoadClass()) {
+ } else if (!last_input->IsClinitCheck() && !last_input->IsLoadClass()) {
AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check "
"has a last instruction (%s:%d) which is neither a clinit check "
"nor a load class instruction.",
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index e5d94c3504..02816cf7ce 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -514,6 +514,14 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha;
}
+ void VisitVecMin(HVecMin* min) OVERRIDE {
+ StartAttributeStream("unsigned") << std::boolalpha << min->IsUnsigned() << std::noboolalpha;
+ }
+
+ void VisitVecMax(HVecMax* max) OVERRIDE {
+ StartAttributeStream("unsigned") << std::boolalpha << max->IsUnsigned() << std::noboolalpha;
+ }
+
void VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) OVERRIDE {
StartAttributeStream("kind") << instruction->GetOpKind();
}
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index c93bc210be..8ea312d0ea 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -516,13 +516,13 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
bool GlobalValueNumberer::WillBeReferencedAgain(HBasicBlock* block) const {
DCHECK(visited_blocks_.IsBitSet(block->GetBlockId()));
- for (auto dominated_block : block->GetDominatedBlocks()) {
+ for (const HBasicBlock* dominated_block : block->GetDominatedBlocks()) {
if (!visited_blocks_.IsBitSet(dominated_block->GetBlockId())) {
return true;
}
}
- for (auto successor : block->GetSuccessors()) {
+ for (const HBasicBlock* successor : block->GetSuccessors()) {
if (!visited_blocks_.IsBitSet(successor->GetBlockId())) {
return true;
}
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 4f6ca17de0..142c95780e 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -140,6 +140,14 @@ void HInliner::Run() {
DCHECK_NE(total_number_of_instructions_, 0u);
DCHECK_NE(inlining_budget_, 0u);
+ // If we're compiling with a core image (which is only used for
+ // test purposes), honor inlining directives in method names:
+ // - if a method's name contains the substring "$inline$", ensure
+ // that this method is actually inlined;
+ // - if a method's name contains the substring "$noinline$", do not
+ // inline that method.
+ const bool honor_inlining_directives = IsCompilingWithCoreImage();
+
// Keep a copy of all blocks when starting the visit.
ArenaVector<HBasicBlock*> blocks = graph_->GetReversePostOrder();
DCHECK(!blocks.empty());
@@ -152,7 +160,7 @@ void HInliner::Run() {
HInvoke* call = instruction->AsInvoke();
// As long as the call is not intrinsified, it is worth trying to inline.
if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) {
- if (kIsDebugBuild && IsCompilingWithCoreImage()) {
+ if (honor_inlining_directives) {
// Debugging case: directives in method names control or assert on inlining.
std::string callee_name = outer_compilation_unit_.GetDexFile()->PrettyMethod(
call->GetDexMethodIndex(), /* with_signature */ false);
@@ -1501,8 +1509,13 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction,
}
}
if (needs_constructor_barrier) {
- HMemoryBarrier* barrier = new (graph_->GetArena()) HMemoryBarrier(kStoreStore, kNoDexPc);
- invoke_instruction->GetBlock()->InsertInstructionBefore(barrier, invoke_instruction);
+ // See CompilerDriver::RequiresConstructorBarrier for more details.
+ DCHECK(obj != nullptr) << "only non-static methods can have a constructor fence";
+
+ HConstructorFence* constructor_fence =
+ new (graph_->GetArena()) HConstructorFence(obj, kNoDexPc, graph_->GetArena());
+ invoke_instruction->GetBlock()->InsertInstructionBefore(constructor_fence,
+ invoke_instruction);
}
*return_replacement = nullptr;
break;
@@ -1870,7 +1883,7 @@ void HInliner::RunOptimizations(HGraph* callee_graph,
HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner");
HConstantFolding fold(callee_graph, "constant_folding$inliner");
HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_, handles_);
- InstructionSimplifier simplify(callee_graph, codegen_, inline_stats_);
+ InstructionSimplifier simplify(callee_graph, codegen_, compiler_driver_, inline_stats_);
IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_);
HOptimization* optimizations[] = {
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 978c6a2d71..df9e7164ed 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -451,10 +451,13 @@ void HInstructionBuilder::InitializeParameters() {
referrer_method_id.class_idx_,
parameter_index++,
Primitive::kPrimNot,
- true);
+ /* is_this */ true);
AppendInstruction(parameter);
UpdateLocal(locals_index++, parameter);
number_of_parameters--;
+ current_this_parameter_ = parameter;
+ } else {
+ DCHECK(current_this_parameter_ == nullptr);
}
const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id);
@@ -465,7 +468,7 @@ void HInstructionBuilder::InitializeParameters() {
arg_types->GetTypeItem(shorty_pos - 1).type_idx_,
parameter_index++,
Primitive::GetType(shorty[shorty_pos]),
- false);
+ /* is_this */ false);
++shorty_pos;
AppendInstruction(parameter);
// Store the parameter value in the local that the dex code will use
@@ -588,6 +591,8 @@ void HInstructionBuilder::Binop_22b(const Instruction& instruction, bool reverse
UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
}
+// Does the method being compiled need any constructor barriers being inserted?
+// (Always 'false' for methods that aren't <init>.)
static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, CompilerDriver* driver) {
// Can be null in unit tests only.
if (UNLIKELY(cu == nullptr)) {
@@ -596,6 +601,11 @@ static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, CompilerDri
Thread* self = Thread::Current();
return cu->IsConstructor()
+ && !cu->IsStatic()
+ // RequiresConstructorBarrier must only be queried for <init> methods;
+ // it's effectively "false" for every other method.
+ //
+ // See CompilerDriver::RequiresConstructBarrier for more explanation.
&& driver->RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex());
}
@@ -639,13 +649,24 @@ void HInstructionBuilder::BuildReturn(const Instruction& instruction,
Primitive::Type type,
uint32_t dex_pc) {
if (type == Primitive::kPrimVoid) {
+ // Only <init> (which is a return-void) could possibly have a constructor fence.
// This may insert additional redundant constructor fences from the super constructors.
// TODO: remove redundant constructor fences (b/36656456).
if (RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_)) {
- AppendInstruction(new (arena_) HMemoryBarrier(kStoreStore, dex_pc));
+ // Compiling instance constructor.
+ if (kIsDebugBuild) {
+ std::string method_name = graph_->GetMethodName();
+ CHECK_EQ(std::string("<init>"), method_name);
+ }
+
+ HInstruction* fence_target = current_this_parameter_;
+ DCHECK(fence_target != nullptr);
+
+ AppendInstruction(new (arena_) HConstructorFence(fence_target, dex_pc, arena_));
}
AppendInstruction(new (arena_) HReturnVoid(dex_pc));
} else {
+ DCHECK(!RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_));
HInstruction* value = LoadLocal(instruction.VRegA(), type);
AppendInstruction(new (arena_) HReturn(value, dex_pc));
}
@@ -941,7 +962,7 @@ bool HInstructionBuilder::BuildInvokePolymorphic(const Instruction& instruction
false /* is_unresolved */);
}
-bool HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t dex_pc) {
+HNewInstance* HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t dex_pc) {
ScopedObjectAccess soa(Thread::Current());
HLoadClass* load_class = BuildLoadClass(type_index, dex_pc);
@@ -965,14 +986,65 @@ bool HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t d
// Consider classes we haven't resolved as potentially finalizable.
bool finalizable = (klass == nullptr) || klass->IsFinalizable();
- AppendInstruction(new (arena_) HNewInstance(
+ HNewInstance* new_instance = new (arena_) HNewInstance(
cls,
dex_pc,
type_index,
*dex_compilation_unit_->GetDexFile(),
finalizable,
- entrypoint));
- return true;
+ entrypoint);
+ AppendInstruction(new_instance);
+
+ return new_instance;
+}
+
+void HInstructionBuilder::BuildConstructorFenceForAllocation(HInstruction* allocation) {
+ DCHECK(allocation != nullptr &&
+ (allocation->IsNewInstance() ||
+ allocation->IsNewArray())); // corresponding to "new" keyword in JLS.
+
+ if (allocation->IsNewInstance()) {
+ // STRING SPECIAL HANDLING:
+ // -------------------------------
+ // Strings have a real HNewInstance node but they end up always having 0 uses.
+ // All uses of a String HNewInstance are always transformed to replace their input
+ // of the HNewInstance with an input of the invoke to StringFactory.
+ //
+ // Do not emit an HConstructorFence here since it can inhibit some String new-instance
+ // optimizations (to pass checker tests that rely on those optimizations).
+ HNewInstance* new_inst = allocation->AsNewInstance();
+ HLoadClass* load_class = new_inst->GetLoadClass();
+
+ Thread* self = Thread::Current();
+ ScopedObjectAccess soa(self);
+ StackHandleScope<1> hs(self);
+ Handle<mirror::Class> klass = load_class->GetClass();
+ if (klass != nullptr && klass->IsStringClass()) {
+ return;
+ // Note: Do not use allocation->IsStringAlloc which requires
+ // a valid ReferenceTypeInfo, but that doesn't get made until after reference type
+ // propagation (and instruction builder is too early).
+ }
+ // (In terms of correctness, the StringFactory needs to provide its own
+ // default initialization barrier, see below.)
+ }
+
+ // JLS 17.4.5 "Happens-before Order" describes:
+ //
+ // The default initialization of any object happens-before any other actions (other than
+ // default-writes) of a program.
+ //
+ // In our implementation the default initialization of an object to type T means
+ // setting all of its initial data (object[0..size)) to 0, and setting the
+ // object's class header (i.e. object.getClass() == T.class).
+ //
+ // In practice this fence ensures that the writes to the object header
+ // are visible to other threads if this object escapes the current thread.
+ // (and in theory the 0-initializing, but that happens automatically
+ // when new memory pages are mapped in by the OS).
+ HConstructorFence* ctor_fence =
+ new (arena_) HConstructorFence(allocation, allocation->GetDexPc(), arena_);
+ AppendInstruction(ctor_fence);
}
static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class)
@@ -1501,15 +1573,15 @@ void HInstructionBuilder::BuildArrayAccess(const Instruction& instruction,
graph_->SetHasBoundsChecks(true);
}
-void HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc,
- dex::TypeIndex type_index,
- uint32_t number_of_vreg_arguments,
- bool is_range,
- uint32_t* args,
- uint32_t register_index) {
+HNewArray* HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc,
+ dex::TypeIndex type_index,
+ uint32_t number_of_vreg_arguments,
+ bool is_range,
+ uint32_t* args,
+ uint32_t register_index) {
HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc);
HLoadClass* cls = BuildLoadClass(type_index, dex_pc);
- HInstruction* object = new (arena_) HNewArray(cls, length, dex_pc);
+ HNewArray* const object = new (arena_) HNewArray(cls, length, dex_pc);
AppendInstruction(object);
const char* descriptor = dex_file_->StringByTypeIdx(type_index);
@@ -1529,6 +1601,8 @@ void HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc,
AppendInstruction(aset);
}
latest_result_ = object;
+
+ return object;
}
template <typename T>
@@ -2513,10 +2587,12 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
}
case Instruction::NEW_INSTANCE: {
- if (!BuildNewInstance(dex::TypeIndex(instruction.VRegB_21c()), dex_pc)) {
- return false;
- }
+ HNewInstance* new_instance =
+ BuildNewInstance(dex::TypeIndex(instruction.VRegB_21c()), dex_pc);
+ DCHECK(new_instance != nullptr);
+
UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+ BuildConstructorFenceForAllocation(new_instance);
break;
}
@@ -2524,8 +2600,11 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
dex::TypeIndex type_index(instruction.VRegC_22c());
HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt);
HLoadClass* cls = BuildLoadClass(type_index, dex_pc);
- AppendInstruction(new (arena_) HNewArray(cls, length, dex_pc));
+
+ HNewArray* new_array = new (arena_) HNewArray(cls, length, dex_pc);
+ AppendInstruction(new_array);
UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction());
+ BuildConstructorFenceForAllocation(new_array);
break;
}
@@ -2534,7 +2613,13 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
dex::TypeIndex type_index(instruction.VRegB_35c());
uint32_t args[5];
instruction.GetVarArgs(args);
- BuildFilledNewArray(dex_pc, type_index, number_of_vreg_arguments, false, args, 0);
+ HNewArray* new_array = BuildFilledNewArray(dex_pc,
+ type_index,
+ number_of_vreg_arguments,
+ /* is_range */ false,
+ args,
+ /* register_index */ 0);
+ BuildConstructorFenceForAllocation(new_array);
break;
}
@@ -2542,8 +2627,13 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
dex::TypeIndex type_index(instruction.VRegB_3rc());
uint32_t register_index = instruction.VRegC_3rc();
- BuildFilledNewArray(
- dex_pc, type_index, number_of_vreg_arguments, true, nullptr, register_index);
+ HNewArray* new_array = BuildFilledNewArray(dex_pc,
+ type_index,
+ number_of_vreg_arguments,
+ /* is_range */ true,
+ /* args*/ nullptr,
+ register_index);
+ BuildConstructorFenceForAllocation(new_array);
break;
}
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 7fdc1883ca..e968760d84 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -62,6 +62,7 @@ class HInstructionBuilder : public ValueObject {
current_block_(nullptr),
current_locals_(nullptr),
latest_result_(nullptr),
+ current_this_parameter_(nullptr),
compiler_driver_(driver),
code_generator_(code_generator),
dex_compilation_unit_(dex_compilation_unit),
@@ -193,12 +194,12 @@ class HInstructionBuilder : public ValueObject {
uint32_t register_index);
// Builds a new array node and the instructions that fill it.
- void BuildFilledNewArray(uint32_t dex_pc,
- dex::TypeIndex type_index,
- uint32_t number_of_vreg_arguments,
- bool is_range,
- uint32_t* args,
- uint32_t register_index);
+ HNewArray* BuildFilledNewArray(uint32_t dex_pc,
+ dex::TypeIndex type_index,
+ uint32_t number_of_vreg_arguments,
+ bool is_range,
+ uint32_t* args,
+ uint32_t register_index);
void BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc);
@@ -287,7 +288,11 @@ class HInstructionBuilder : public ValueObject {
REQUIRES_SHARED(Locks::mutator_lock_);
// Build a HNewInstance instruction.
- bool BuildNewInstance(dex::TypeIndex type_index, uint32_t dex_pc);
+ HNewInstance* BuildNewInstance(dex::TypeIndex type_index, uint32_t dex_pc);
+
+ // Build a HConstructorFence for HNewInstance and HNewArray instructions. This ensures the
+ // happens-before ordering for default-initialization of the object referred to by new_instance.
+ void BuildConstructorFenceForAllocation(HInstruction* allocation);
// Return whether the compiler can assume `cls` is initialized.
bool IsInitialized(Handle<mirror::Class> cls) const
@@ -325,6 +330,11 @@ class HInstructionBuilder : public ValueObject {
HBasicBlock* current_block_;
ArenaVector<HInstruction*>* current_locals_;
HInstruction* latest_result_;
+ // Current "this" parameter.
+ // Valid only after InitializeParameters() finishes.
+ // * Null for static methods.
+ // * Non-null for instance methods.
+ HParameterValue* current_this_parameter_;
CompilerDriver* const compiler_driver_;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 2dcc12e294..d14716601c 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -30,9 +30,11 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
public:
InstructionSimplifierVisitor(HGraph* graph,
CodeGenerator* codegen,
+ CompilerDriver* compiler_driver,
OptimizingCompilerStats* stats)
: HGraphDelegateVisitor(graph),
codegen_(codegen),
+ compiler_driver_(compiler_driver),
stats_(stats) {}
void Run();
@@ -119,6 +121,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
CodeGenerator* codegen_;
+ CompilerDriver* compiler_driver_;
OptimizingCompilerStats* stats_;
bool simplification_occurred_ = false;
int simplifications_at_current_position_ = 0;
@@ -130,7 +133,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
};
void InstructionSimplifier::Run() {
- InstructionSimplifierVisitor visitor(graph_, codegen_, stats_);
+ InstructionSimplifierVisitor visitor(graph_, codegen_, compiler_driver_, stats_);
visitor.Run();
}
@@ -257,7 +260,8 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
if (shift_amount->IsConstant()) {
int64_t cst = Int64FromConstant(shift_amount->AsConstant());
- if ((cst & implicit_mask) == 0) {
+ int64_t masked_cst = cst & implicit_mask;
+ if (masked_cst == 0) {
// Replace code looking like
// SHL dst, value, 0
// with
@@ -266,6 +270,17 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
instruction->GetBlock()->RemoveInstruction(instruction);
RecordSimplification();
return;
+ } else if (masked_cst != cst) {
+ // Replace code looking like
+ // SHL dst, value, cst
+ // where cst exceeds maximum distance with the equivalent
+ // SHL dst, value, cst & implicit_mask
+ // (as defined by shift semantics). This ensures other
+ // optimizations do not need to special case for such situations.
+ DCHECK_EQ(shift_amount->GetType(), Primitive::kPrimInt);
+ instruction->ReplaceInput(GetGraph()->GetIntConstant(masked_cst), /* index */ 1);
+ RecordSimplification();
+ return;
}
}
@@ -1884,7 +1899,7 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction)
// the invoke, as we would need to look it up in the current dex file, and it
// is unlikely that it exists. The most usual situation for such typed
// arraycopy methods is a direct pointer to the boot image.
- HSharpening::SharpenInvokeStaticOrDirect(invoke, codegen_);
+ HSharpening::SharpenInvokeStaticOrDirect(invoke, codegen_, compiler_driver_);
}
}
}
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
index f7329a4a1f..5e2045580b 100644
--- a/compiler/optimizing/instruction_simplifier.h
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -24,6 +24,7 @@
namespace art {
class CodeGenerator;
+class CompilerDriver;
/**
* Implements optimizations specific to each instruction.
@@ -37,12 +38,14 @@ class CodeGenerator;
*/
class InstructionSimplifier : public HOptimization {
public:
- explicit InstructionSimplifier(HGraph* graph,
- CodeGenerator* codegen,
- OptimizingCompilerStats* stats = nullptr,
- const char* name = kInstructionSimplifierPassName)
+ InstructionSimplifier(HGraph* graph,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver,
+ OptimizingCompilerStats* stats = nullptr,
+ const char* name = kInstructionSimplifierPassName)
: HOptimization(graph, name, stats),
- codegen_(codegen) {}
+ codegen_(codegen),
+ compiler_driver_(compiler_driver) {}
static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier";
@@ -50,6 +53,7 @@ class InstructionSimplifier : public HOptimization {
private:
CodeGenerator* codegen_;
+ CompilerDriver* compiler_driver_;
DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier);
};
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index f16e3727c8..311be1fb49 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -216,5 +216,18 @@ void InstructionSimplifierArm64Visitor::VisitVecMul(HVecMul* instruction) {
}
}
+void InstructionSimplifierArm64Visitor::VisitVecLoad(HVecLoad* instruction) {
+ if (!instruction->IsStringCharAt()
+ && TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) {
+ RecordSimplification();
+ }
+}
+
+void InstructionSimplifierArm64Visitor::VisitVecStore(HVecStore* instruction) {
+ if (TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) {
+ RecordSimplification();
+ }
+}
+
} // namespace arm64
} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index eec4e49792..8596f6ad40 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -75,6 +75,8 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor {
void VisitUShr(HUShr* instruction) OVERRIDE;
void VisitXor(HXor* instruction) OVERRIDE;
void VisitVecMul(HVecMul* instruction) OVERRIDE;
+ void VisitVecLoad(HVecLoad* instruction) OVERRIDE;
+ void VisitVecStore(HVecStore* instruction) OVERRIDE;
OptimizingCompilerStats* stats_;
};
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index 7d1f146587..e5a8499ff4 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -16,6 +16,8 @@
#include "instruction_simplifier_shared.h"
+#include "mirror/array-inl.h"
+
namespace art {
namespace {
@@ -247,6 +249,7 @@ bool TryExtractArrayAccessAddress(HInstruction* access,
access->GetType() == Primitive::kPrimNot) {
// For object arrays, the read barrier instrumentation requires
// the original array pointer.
+ // TODO: This can be relaxed for Baker CC.
return false;
}
@@ -345,4 +348,59 @@ bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa) {
return false;
}
+bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index) {
+ if (index->IsConstant()) {
+ // If index is constant the whole address calculation often can be done by LDR/STR themselves.
+ // TODO: Treat the case with not-embedable constant.
+ return false;
+ }
+
+ HGraph* graph = access->GetBlock()->GetGraph();
+ ArenaAllocator* arena = graph->GetArena();
+ Primitive::Type packed_type = access->GetPackedType();
+ uint32_t data_offset = mirror::Array::DataOffset(
+ Primitive::ComponentSize(packed_type)).Uint32Value();
+ size_t component_shift = Primitive::ComponentSizeShift(packed_type);
+
+ bool is_extracting_beneficial = false;
+ // It is beneficial to extract index intermediate address only if there are at least 2 users.
+ for (const HUseListNode<HInstruction*>& use : index->GetUses()) {
+ HInstruction* user = use.GetUser();
+ if (user->IsVecMemoryOperation() && user != access) {
+ HVecMemoryOperation* another_access = user->AsVecMemoryOperation();
+ Primitive::Type another_packed_type = another_access->GetPackedType();
+ uint32_t another_data_offset = mirror::Array::DataOffset(
+ Primitive::ComponentSize(another_packed_type)).Uint32Value();
+ size_t another_component_shift = Primitive::ComponentSizeShift(another_packed_type);
+ if (another_data_offset == data_offset && another_component_shift == component_shift) {
+ is_extracting_beneficial = true;
+ break;
+ }
+ } else if (user->IsIntermediateAddressIndex()) {
+ HIntermediateAddressIndex* another_access = user->AsIntermediateAddressIndex();
+ uint32_t another_data_offset = another_access->GetOffset()->AsIntConstant()->GetValue();
+ size_t another_component_shift = another_access->GetShift()->AsIntConstant()->GetValue();
+ if (another_data_offset == data_offset && another_component_shift == component_shift) {
+ is_extracting_beneficial = true;
+ break;
+ }
+ }
+ }
+
+ if (!is_extracting_beneficial) {
+ return false;
+ }
+
+ // Proceed to extract the index + data_offset address computation.
+ HIntConstant* offset = graph->GetIntConstant(data_offset);
+ HIntConstant* shift = graph->GetIntConstant(component_shift);
+ HIntermediateAddressIndex* address =
+ new (arena) HIntermediateAddressIndex(index, offset, shift, kNoDexPc);
+
+ access->GetBlock()->InsertInstructionBefore(address, access);
+ access->ReplaceInput(address, 1);
+
+ return true;
+}
+
} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index 2ea103a518..371619fa2e 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -59,6 +59,7 @@ bool TryExtractArrayAccessAddress(HInstruction* access,
size_t data_offset);
bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa);
+bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index);
} // namespace art
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 6236bd87ab..b664d41013 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -25,7 +25,7 @@
#include "mirror/dex_cache-inl.h"
#include "nodes.h"
#include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
#include "utils.h"
namespace art {
@@ -146,7 +146,7 @@ void IntrinsicsRecognizer::Run() {
Intrinsics intrinsic = static_cast<Intrinsics>(art_method->GetIntrinsic());
if (!CheckInvokeType(intrinsic, invoke)) {
LOG(WARNING) << "Found an intrinsic with unexpected invoke type: "
- << intrinsic << " for "
+ << static_cast<uint32_t>(intrinsic) << " for "
<< art_method->PrettyMethod()
<< invoke->DebugName();
} else {
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 750f9cc213..ae5f8d1760 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -28,7 +28,7 @@
#include "mirror/reference.h"
#include "mirror/string.h"
#include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
#include "utils/arm/assembler_arm.h"
namespace art {
@@ -1010,17 +1010,14 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM* code
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// Need to make sure the reference stored in the field is a to-space
// one before attempting the CAS or the CAS could fail incorrectly.
- codegen->GenerateReferenceLoadWithBakerReadBarrier(
+ codegen->UpdateReferenceFieldWithBakerReadBarrier(
invoke,
out_loc, // Unused, used only as a "temporary" within the read barrier.
base,
- /* offset */ 0u,
- /* index */ offset_loc,
- ScaleFactor::TIMES_1,
+ /* field_offset */ offset_loc,
tmp_ptr_loc,
/* needs_null_check */ false,
- /* always_update_field */ true,
- &tmp);
+ tmp);
}
}
@@ -1648,6 +1645,8 @@ void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) {
// is clobbered by ReadBarrierMarkRegX entry points). Get an extra
// temporary register from the register allocator.
locations->AddTemp(Location::RequiresRegister());
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen_);
+ arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations);
}
}
@@ -2599,11 +2598,7 @@ void IntrinsicCodeGeneratorARM::VisitFloatIsInfinite(HInvoke* invoke) {
// We don't care about the sign bit, so shift left.
__ Lsl(out, out, 1);
__ eor(out, out, ShifterOperand(infinity));
- // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
- __ clz(out, out);
- // Any number less than 32 logically shifted right by 5 bits results in 0;
- // the same operation on 32 yields 1.
- __ Lsr(out, out, 5);
+ codegen_->GenerateConditionWithZero(kCondEQ, out, out);
}
void IntrinsicLocationsBuilderARM::VisitDoubleIsInfinite(HInvoke* invoke) {
@@ -2626,63 +2621,7 @@ void IntrinsicCodeGeneratorARM::VisitDoubleIsInfinite(HInvoke* invoke) {
__ eor(out, out, ShifterOperand(infinity_high2));
// We don't care about the sign bit, so shift left.
__ orr(out, IP, ShifterOperand(out, LSL, 1));
- // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
- __ clz(out, out);
- // Any number less than 32 logically shifted right by 5 bits results in 0;
- // the same operation on 32 yields 1.
- __ Lsr(out, out, 5);
-}
-
-void IntrinsicLocationsBuilderARM::VisitReferenceGetReferent(HInvoke* invoke) {
- if (kEmitCompilerReadBarrier) {
- // Do not intrinsify this call with the read barrier configuration.
- return;
- }
- LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnSlowPath,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorARM::VisitReferenceGetReferent(HInvoke* invoke) {
- DCHECK(!kEmitCompilerReadBarrier);
- ArmAssembler* const assembler = GetAssembler();
- LocationSummary* locations = invoke->GetLocations();
-
- Register obj = locations->InAt(0).AsRegister<Register>();
- Register out = locations->Out().AsRegister<Register>();
-
- SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
- codegen_->AddSlowPath(slow_path);
-
- // Load ArtMethod first.
- HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
- DCHECK(invoke_direct != nullptr);
- Register temp = codegen_->GenerateCalleeMethodStaticOrDirectCall(
- invoke_direct, locations->GetTemp(0)).AsRegister<Register>();
-
- // Now get declaring class.
- __ ldr(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
-
- uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
- uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
- DCHECK_NE(slow_path_flag_offset, 0u);
- DCHECK_NE(disable_flag_offset, 0u);
- DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
-
- // Check static flags that prevent using intrinsic.
- __ ldr(IP, Address(temp, disable_flag_offset));
- __ ldr(temp, Address(temp, slow_path_flag_offset));
- __ orr(IP, IP, ShifterOperand(temp));
- __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
-
- // Fast path.
- __ ldr(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
- codegen_->MaybeRecordImplicitNullCheck(invoke);
- __ MaybeUnpoisonHeapReference(out);
- __ Bind(slow_path->GetExitLabel());
+ codegen_->GenerateConditionWithZero(kCondEQ, out, out);
}
void IntrinsicLocationsBuilderARM::VisitIntegerValueOf(HInvoke* invoke) {
@@ -2754,6 +2693,30 @@ void IntrinsicCodeGeneratorARM::VisitIntegerValueOf(HInvoke* invoke) {
}
}
+void IntrinsicLocationsBuilderARM::VisitThreadInterrupted(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM::VisitThreadInterrupted(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ Register out = invoke->GetLocations()->Out().AsRegister<Register>();
+ int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value();
+ __ LoadFromOffset(kLoadWord, out, TR, offset);
+ Label done;
+ Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
+ __ CompareAndBranchIfZero(out, final_label);
+ __ dmb(ISH);
+ __ LoadImmediate(IP, 0);
+ __ StoreToOffset(kStoreWord, IP, TR, offset);
+ __ dmb(ISH);
+ if (done.IsLinked()) {
+ __ Bind(&done);
+ }
+}
+
UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
UNIMPLEMENTED_INTRINSIC(ARM, MathMinFloatFloat)
UNIMPLEMENTED_INTRINSIC(ARM, MathMaxDoubleDouble)
@@ -2767,6 +2730,7 @@ UNIMPLEMENTED_INTRINSIC(ARM, MathRoundDouble) // Could be done by changing rou
UNIMPLEMENTED_INTRINSIC(ARM, MathRoundFloat) // Could be done by changing rounding mode, maybe?
UNIMPLEMENTED_INTRINSIC(ARM, UnsafeCASLong) // High register pressure.
UNIMPLEMENTED_INTRINSIC(ARM, SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(ARM, ReferenceGetReferent)
UNIMPLEMENTED_INTRINSIC(ARM, IntegerHighestOneBit)
UNIMPLEMENTED_INTRINSIC(ARM, LongHighestOneBit)
UNIMPLEMENTED_INTRINSIC(ARM, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 4d360158a2..990a773a95 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -28,7 +28,7 @@
#include "mirror/reference.h"
#include "mirror/string-inl.h"
#include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
#include "utils/arm64/assembler_arm64.h"
using namespace vixl::aarch64; // NOLINT(build/namespaces)
@@ -1154,17 +1154,14 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* co
Register temp = WRegisterFrom(locations->GetTemp(0));
// Need to make sure the reference stored in the field is a to-space
// one before attempting the CAS or the CAS could fail incorrectly.
- codegen->GenerateReferenceLoadWithBakerReadBarrier(
+ codegen->UpdateReferenceFieldWithBakerReadBarrier(
invoke,
out_loc, // Unused, used only as a "temporary" within the read barrier.
base,
- /* offset */ 0u,
- /* index */ offset_loc,
- /* scale_factor */ 0u,
+ /* field_offset */ offset_loc,
temp,
/* needs_null_check */ false,
- /* use_load_acquire */ false,
- /* always_update_field */ true);
+ /* use_load_acquire */ false);
}
}
@@ -2900,69 +2897,6 @@ void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
}
-void IntrinsicLocationsBuilderARM64::VisitReferenceGetReferent(HInvoke* invoke) {
- if (kEmitCompilerReadBarrier) {
- // Do not intrinsify this call with the read barrier configuration.
- return;
- }
- LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnSlowPath,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
- DCHECK(!kEmitCompilerReadBarrier);
- MacroAssembler* masm = GetVIXLAssembler();
- LocationSummary* locations = invoke->GetLocations();
-
- Register obj = InputRegisterAt(invoke, 0);
- Register out = OutputRegister(invoke);
-
- SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
- codegen_->AddSlowPath(slow_path);
-
- // Load ArtMethod first.
- HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
- DCHECK(invoke_direct != nullptr);
- Register temp0 = XRegisterFrom(codegen_->GenerateCalleeMethodStaticOrDirectCall(
- invoke_direct, locations->GetTemp(0)));
-
- // Now get declaring class.
- __ Ldr(temp0.W(), MemOperand(temp0, ArtMethod::DeclaringClassOffset().Int32Value()));
-
- uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
- uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
- DCHECK_NE(slow_path_flag_offset, 0u);
- DCHECK_NE(disable_flag_offset, 0u);
- DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
-
- // Check static flags that prevent using intrinsic.
- if (slow_path_flag_offset == disable_flag_offset + 1) {
- // Load two adjacent flags in one 64-bit load.
- __ Ldr(temp0, MemOperand(temp0, disable_flag_offset));
- } else {
- UseScratchRegisterScope temps(masm);
- Register temp1 = temps.AcquireW();
- __ Ldr(temp1.W(), MemOperand(temp0, disable_flag_offset));
- __ Ldr(temp0.W(), MemOperand(temp0, slow_path_flag_offset));
- __ Orr(temp0, temp1, temp0);
- }
- __ Cbnz(temp0, slow_path->GetEntryLabel());
-
- {
- // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
- vixl::EmissionCheckScope guard(codegen_->GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
- // Fast path.
- __ Ldr(out, HeapOperand(obj, mirror::Reference::ReferentOffset().Int32Value()));
- codegen_->MaybeRecordImplicitNullCheck(invoke);
- }
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
- __ Bind(slow_path->GetExitLabel());
-}
-
void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
InvokeRuntimeCallingConvention calling_convention;
IntrinsicVisitor::ComputeIntegerValueOfLocations(
@@ -3036,6 +2970,29 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
}
}
+void IntrinsicLocationsBuilderARM64::VisitThreadInterrupted(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) {
+ MacroAssembler* masm = GetVIXLAssembler();
+ Register out = RegisterFrom(invoke->GetLocations()->Out(), Primitive::kPrimInt);
+ UseScratchRegisterScope temps(masm);
+ Register temp = temps.AcquireX();
+
+ __ Add(temp, tr, Thread::InterruptedOffset<kArm64PointerSize>().Int32Value());
+ __ Ldar(out.W(), MemOperand(temp));
+
+ vixl::aarch64::Label done;
+ __ Cbz(out.W(), &done);
+ __ Stlr(wzr, MemOperand(temp));
+ __ Bind(&done);
+}
+
+UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit)
UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit)
UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h
index 3c53517b28..ff59ce9658 100644
--- a/compiler/optimizing/intrinsics_arm64.h
+++ b/compiler/optimizing/intrinsics_arm64.h
@@ -24,7 +24,8 @@ namespace aarch64 {
class MacroAssembler;
-}} // namespace vixl::aarch64
+} // namespace aarch64
+} // namespace vixl
namespace art {
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index fd8a37ae05..0e04b9a950 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -26,7 +26,7 @@
#include "mirror/reference.h"
#include "mirror/string.h"
#include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
#include "aarch32/constants-aarch32.h"
@@ -1347,17 +1347,14 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL*
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// Need to make sure the reference stored in the field is a to-space
// one before attempting the CAS or the CAS could fail incorrectly.
- codegen->GenerateReferenceLoadWithBakerReadBarrier(
+ codegen->UpdateReferenceFieldWithBakerReadBarrier(
invoke,
out_loc, // Unused, used only as a "temporary" within the read barrier.
base,
- /* offset */ 0u,
- /* index */ offset_loc,
- ScaleFactor::TIMES_1,
+ /* field_offset */ offset_loc,
tmp_ptr_loc,
/* needs_null_check */ false,
- /* always_update_field */ true,
- &tmp);
+ tmp);
}
}
@@ -2026,6 +2023,8 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
// is clobbered by ReadBarrierMarkRegX entry points). Get an extra
// temporary register from the register allocator.
locations->AddTemp(Location::RequiresRegister());
+ CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_);
+ arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations);
}
}
@@ -2972,11 +2971,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
// We don't care about the sign bit, so shift left.
__ Lsl(out, out, 1);
__ Eor(out, out, infinity);
- // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
- __ Clz(out, out);
- // Any number less than 32 logically shifted right by 5 bits results in 0;
- // the same operation on 32 yields 1.
- __ Lsr(out, out, 5);
+ codegen_->GenerateConditionWithZero(kCondEQ, out, out);
}
void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
@@ -3002,65 +2997,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
__ Eor(out, out, infinity_high2);
// We don't care about the sign bit, so shift left.
__ Orr(out, temp, Operand(out, vixl32::LSL, 1));
- // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
- __ Clz(out, out);
- // Any number less than 32 logically shifted right by 5 bits results in 0;
- // the same operation on 32 yields 1.
- __ Lsr(out, out, 5);
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
- if (kEmitCompilerReadBarrier) {
- // Do not intrinsify this call with the read barrier configuration.
- return;
- }
- LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnSlowPath,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
- DCHECK(!kEmitCompilerReadBarrier);
- ArmVIXLAssembler* assembler = GetAssembler();
- LocationSummary* locations = invoke->GetLocations();
-
- vixl32::Register obj = InputRegisterAt(invoke, 0);
- vixl32::Register out = OutputRegister(invoke);
-
- SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
- codegen_->AddSlowPath(slow_path);
-
- // Load ArtMethod first.
- HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
- DCHECK(invoke_direct != nullptr);
- vixl32::Register temp0 = RegisterFrom(codegen_->GenerateCalleeMethodStaticOrDirectCall(
- invoke_direct, locations->GetTemp(0)));
-
- // Now get declaring class.
- __ Ldr(temp0, MemOperand(temp0, ArtMethod::DeclaringClassOffset().Int32Value()));
-
- uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
- uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
- DCHECK_NE(slow_path_flag_offset, 0u);
- DCHECK_NE(disable_flag_offset, 0u);
- DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
-
- // Check static flags that prevent using intrinsic.
- UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
- vixl32::Register temp1 = temps.Acquire();
- __ Ldr(temp1, MemOperand(temp0, disable_flag_offset));
- __ Ldr(temp0, MemOperand(temp0, slow_path_flag_offset));
- __ Orr(temp0, temp1, temp0);
- __ CompareAndBranchIfNonZero(temp0, slow_path->GetEntryLabel());
-
- // Fast path.
- __ Ldr(out, MemOperand(obj, mirror::Reference::ReferentOffset().Int32Value()));
- codegen_->MaybeRecordImplicitNullCheck(invoke);
- assembler->MaybeUnpoisonHeapReference(out);
- __ Bind(slow_path->GetExitLabel());
+ codegen_->GenerateConditionWithZero(kCondEQ, out, out);
}
void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) {
@@ -3136,7 +3073,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
__ Add(out, in, -info.low);
__ Cmp(out, info.high - info.low + 1);
vixl32::Label allocate, done;
- __ B(hs, &allocate);
+ __ B(hs, &allocate, /* is_far_target */ false);
// If the value is within the bounds, load the j.l.Integer directly from the array.
uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
@@ -3158,9 +3095,36 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
}
}
+void IntrinsicLocationsBuilderARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
+ ArmVIXLAssembler* assembler = GetAssembler();
+ vixl32::Register out = RegisterFrom(invoke->GetLocations()->Out());
+ int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value();
+ __ Ldr(out, MemOperand(tr, offset));
+ UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+ vixl32::Register temp = temps.Acquire();
+ vixl32::Label done;
+ vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
+ __ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
+ __ Dmb(vixl32::ISH);
+ __ Mov(temp, 0);
+ assembler->StoreToOffset(kStoreWord, temp, tr, offset);
+ __ Dmb(vixl32::ISH);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
+}
+
UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe?
UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure.
UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit)
UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 41df56b514..ea3e9e5ec9 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -23,6 +23,7 @@
#include "intrinsics.h"
#include "mirror/array-inl.h"
#include "mirror/string.h"
+#include "scoped_thread_state_change-inl.h"
#include "thread.h"
#include "utils/mips/assembler_mips.h"
#include "utils/mips/constants_mips.h"
@@ -32,7 +33,7 @@ namespace art {
namespace mips {
IntrinsicLocationsBuilderMIPS::IntrinsicLocationsBuilderMIPS(CodeGeneratorMIPS* codegen)
- : arena_(codegen->GetGraph()->GetArena()) {
+ : codegen_(codegen), arena_(codegen->GetGraph()->GetArena()) {
}
MipsAssembler* IntrinsicCodeGeneratorMIPS::GetAssembler() {
@@ -1525,6 +1526,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall),
kIntrinsified);
+ if (can_call && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
@@ -2552,101 +2556,110 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) {
Register out = locations->Out().AsRegister<Register>();
MipsLabel done;
- MipsLabel finite;
- MipsLabel add;
- // if (in.isNaN) {
- // return 0;
- // }
- //
- // out = floor.w.s(in);
- //
- // /*
- // * This "if" statement is only needed for the pre-R6 version of floor.w.s
- // * which outputs Integer.MAX_VALUE for negative numbers with magnitudes
- // * too large to fit in a 32-bit integer.
- // *
- // * Starting with MIPSR6, which always sets FCSR.NAN2008=1, negative
- // * numbers which are too large to be represented in a 32-bit signed
- // * integer will be processed by floor.w.s to output Integer.MIN_VALUE,
- // * and will no longer be processed by this "if" statement.
- // */
- // if (out == Integer.MAX_VALUE) {
- // TMP = (in < 0.0f) ? 1 : 0;
- // /*
- // * If TMP is 1, then adding it to out will wrap its value from
- // * Integer.MAX_VALUE to Integer.MIN_VALUE.
- // */
- // return out += TMP;
- // }
- //
- // /*
- // * For negative values not handled by the previous "if" statement the
- // * test here will correctly set the value of TMP.
- // */
- // TMP = ((in - out) >= 0.5f) ? 1 : 0;
- // return out += TMP;
-
- // Test for NaN.
if (IsR6()) {
- __ CmpUnS(FTMP, in, in);
+ // out = floor(in);
+ //
+ // if (out != MAX_VALUE && out != MIN_VALUE) {
+ // TMP = ((in - out) >= 0.5) ? 1 : 0;
+ // return out += TMP;
+ // }
+ // return out;
+
+ // out = floor(in);
+ __ FloorWS(FTMP, in);
+ __ Mfc1(out, FTMP);
+
+ // if (out != MAX_VALUE && out != MIN_VALUE)
+ __ Addiu(TMP, out, 1);
+ __ Aui(TMP, TMP, 0x8000); // TMP = out + 0x8000 0001
+ // or out - 0x7FFF FFFF.
+ // IOW, TMP = 1 if out = Int.MIN_VALUE
+ // or TMP = 0 if out = Int.MAX_VALUE.
+ __ Srl(TMP, TMP, 1); // TMP = 0 if out = Int.MIN_VALUE
+ // or out = Int.MAX_VALUE.
+ __ Beqz(TMP, &done);
+
+ // TMP = (0.5f <= (in - out)) ? -1 : 0;
+ __ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float".
+ __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
+ __ SubS(FTMP, in, FTMP);
+ __ Mtc1(AT, half);
+
+ __ CmpLeS(FTMP, half, FTMP);
+ __ Mfc1(TMP, FTMP);
+
+ // Return out -= TMP.
+ __ Subu(out, out, TMP);
} else {
+ // if (in.isNaN) {
+ // return 0;
+ // }
+ //
+ // out = floor.w.s(in);
+ //
+ // /*
+ // * This "if" statement is only needed for the pre-R6 version of floor.w.s
+ // * which outputs Integer.MAX_VALUE for negative numbers with magnitudes
+ // * too large to fit in a 32-bit integer.
+ // */
+ // if (out == Integer.MAX_VALUE) {
+ // TMP = (in < 0.0f) ? 1 : 0;
+ // /*
+ // * If TMP is 1, then adding it to out will wrap its value from
+ // * Integer.MAX_VALUE to Integer.MIN_VALUE.
+ // */
+ // return out += TMP;
+ // }
+ //
+ // /*
+ // * For negative values not handled by the previous "if" statement the
+ // * test here will correctly set the value of TMP.
+ // */
+ // TMP = ((in - out) >= 0.5f) ? 1 : 0;
+ // return out += TMP;
+
+ MipsLabel finite;
+ MipsLabel add;
+
+ // Test for NaN.
__ CunS(in, in);
- }
- // Return zero for NaN.
- __ Move(out, ZERO);
- if (IsR6()) {
- __ Bc1nez(FTMP, &done);
- } else {
+ // Return zero for NaN.
+ __ Move(out, ZERO);
__ Bc1t(&done);
- }
- // out = floor(in);
- __ FloorWS(FTMP, in);
- __ Mfc1(out, FTMP);
+ // out = floor(in);
+ __ FloorWS(FTMP, in);
+ __ Mfc1(out, FTMP);
- if (!IsR6()) {
__ LoadConst32(TMP, -1);
- }
- // TMP = (out = java.lang.Integer.MAX_VALUE) ? -1 : 0;
- __ LoadConst32(AT, std::numeric_limits<int32_t>::max());
- __ Bne(AT, out, &finite);
+ // TMP = (out = java.lang.Integer.MAX_VALUE) ? -1 : 0;
+ __ LoadConst32(AT, std::numeric_limits<int32_t>::max());
+ __ Bne(AT, out, &finite);
- __ Mtc1(ZERO, FTMP);
- if (IsR6()) {
- __ CmpLtS(FTMP, in, FTMP);
- __ Mfc1(TMP, FTMP);
- } else {
+ __ Mtc1(ZERO, FTMP);
__ ColtS(in, FTMP);
- }
- __ B(&add);
+ __ B(&add);
- __ Bind(&finite);
+ __ Bind(&finite);
- // TMP = (0.5f <= (in - out)) ? -1 : 0;
- __ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float".
- __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
- __ SubS(FTMP, in, FTMP);
- __ Mtc1(AT, half);
- if (IsR6()) {
- __ CmpLeS(FTMP, half, FTMP);
- __ Mfc1(TMP, FTMP);
- } else {
+ // TMP = (0.5f <= (in - out)) ? -1 : 0;
+ __ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float".
+ __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
+ __ SubS(FTMP, in, FTMP);
+ __ Mtc1(AT, half);
__ ColeS(half, FTMP);
- }
- __ Bind(&add);
+ __ Bind(&add);
- if (!IsR6()) {
__ Movf(TMP, ZERO);
- }
-
- // Return out -= TMP.
- __ Subu(out, out, TMP);
+ // Return out -= TMP.
+ __ Subu(out, out, TMP);
+ }
__ Bind(&done);
}
@@ -3133,6 +3146,89 @@ void IntrinsicCodeGeneratorMIPS::VisitSystemArrayCopyChar(HInvoke* invoke) {
__ Bind(slow_path->GetExitLabel());
}
+// long java.lang.Integer.valueOf(long)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerValueOf(HInvoke* invoke) {
+ InvokeRuntimeCallingConvention calling_convention;
+ IntrinsicVisitor::ComputeIntegerValueOfLocations(
+ invoke,
+ codegen_,
+ calling_convention.GetReturnLocation(Primitive::kPrimNot),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerValueOf(HInvoke* invoke) {
+ IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+ LocationSummary* locations = invoke->GetLocations();
+ MipsAssembler* assembler = GetAssembler();
+ InstructionCodeGeneratorMIPS* icodegen =
+ down_cast<InstructionCodeGeneratorMIPS*>(codegen_->GetInstructionVisitor());
+
+ Register out = locations->Out().AsRegister<Register>();
+ InvokeRuntimeCallingConvention calling_convention;
+ if (invoke->InputAt(0)->IsConstant()) {
+ int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+ if (value >= info.low && value <= info.high) {
+ // Just embed the j.l.Integer in the code.
+ ScopedObjectAccess soa(Thread::Current());
+ mirror::Object* boxed = info.cache->Get(value + (-info.low));
+ DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+ __ LoadConst32(out, address);
+ } else {
+ // Allocate and initialize a new j.l.Integer.
+ // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+ // JIT object table.
+ uint32_t address =
+ dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ LoadConst32(calling_convention.GetRegisterAt(0), address);
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ StoreConstToOffset(kStoreWord, value, out, info.value_offset, TMP);
+ // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+ // one.
+ icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+ }
+ } else {
+ Register in = locations->InAt(0).AsRegister<Register>();
+ MipsLabel allocate, done;
+ int32_t count = static_cast<uint32_t>(info.high) - info.low + 1;
+
+ // Is (info.low <= in) && (in <= info.high)?
+ __ Addiu32(out, in, -info.low);
+ // As unsigned quantities is out < (info.high - info.low + 1)?
+ if (IsInt<16>(count)) {
+ __ Sltiu(AT, out, count);
+ } else {
+ __ LoadConst32(AT, count);
+ __ Sltu(AT, out, AT);
+ }
+ // Branch if out >= (info.high - info.low + 1).
+ // This means that "in" is outside of the range [info.low, info.high].
+ __ Beqz(AT, &allocate);
+
+ // If the value is within the bounds, load the j.l.Integer directly from the array.
+ uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+ __ LoadConst32(TMP, data_offset + address);
+ __ ShiftAndAdd(out, out, TMP, TIMES_4);
+ __ Lw(out, out, 0);
+ __ MaybeUnpoisonHeapReference(out);
+ __ B(&done);
+
+ __ Bind(&allocate);
+ // Otherwise allocate and initialize a new j.l.Integer.
+ address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ LoadConst32(calling_convention.GetRegisterAt(0), address);
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ StoreToOffset(kStoreWord, in, out, info.value_offset);
+ // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+ // one.
+ icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+ __ Bind(&done);
+ }
+}
+
// Unimplemented intrinsics.
UNIMPLEMENTED_INTRINSIC(MIPS, MathCeil)
@@ -3162,7 +3258,7 @@ UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetInt)
UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetLong)
UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetObject)
-UNIMPLEMENTED_INTRINSIC(MIPS, IntegerValueOf)
+UNIMPLEMENTED_INTRINSIC(MIPS, ThreadInterrupted)
UNREACHABLE_INTRINSICS(MIPS)
diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h
index e134cb882e..eaadad2515 100644
--- a/compiler/optimizing/intrinsics_mips.h
+++ b/compiler/optimizing/intrinsics_mips.h
@@ -49,6 +49,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
bool TryDispatch(HInvoke* invoke);
private:
+ CodeGeneratorMIPS* codegen_;
ArenaAllocator* arena_;
DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS);
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index b57b41f686..2ecb1a3b02 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -23,6 +23,7 @@
#include "intrinsics.h"
#include "mirror/array-inl.h"
#include "mirror/string.h"
+#include "scoped_thread_state_change-inl.h"
#include "thread.h"
#include "utils/mips64/assembler_mips64.h"
#include "utils/mips64/constants_mips64.h"
@@ -32,7 +33,7 @@ namespace art {
namespace mips64 {
IntrinsicLocationsBuilderMIPS64::IntrinsicLocationsBuilderMIPS64(CodeGeneratorMIPS64* codegen)
- : arena_(codegen->GetGraph()->GetArena()) {
+ : codegen_(codegen), arena_(codegen->GetGraph()->GetArena()) {
}
Mips64Assembler* IntrinsicCodeGeneratorMIPS64::GetAssembler() {
@@ -890,54 +891,14 @@ static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, Pri
DCHECK(type == Primitive::kPrimFloat || type == Primitive::kPrimDouble);
Mips64Label done;
- Mips64Label finite;
- Mips64Label add;
- // if (in.isNaN) {
- // return 0;
- // }
- //
// out = floor(in);
//
- // /*
- // * TODO: Amend this code when emulator FCSR.NAN2008=1 bug is fixed.
- // *
- // * Starting with MIPSR6, which always sets FCSR.NAN2008=1, negative
- // * numbers which are too large to be represented in a 32-/64-bit
- // * signed integer will be processed by floor.X.Y to output
- // * Integer.MIN_VALUE/Long.MIN_VALUE, and will no longer be
- // * processed by this "if" statement.
- // *
- // * However, this bug in the 64-bit MIPS emulator causes the
- // * behavior of floor.X.Y to be the same as pre-R6 implementations
- // * of MIPS64. When that bug is fixed this logic should be amended.
- // */
- // if (out == MAX_VALUE) {
- // TMP = (in < 0.0) ? 1 : 0;
- // /*
- // * If TMP is 1, then adding it to out will wrap its value from
- // * MAX_VALUE to MIN_VALUE.
- // */
+ // if (out != MAX_VALUE && out != MIN_VALUE) {
+ // TMP = ((in - out) >= 0.5) ? 1 : 0;
// return out += TMP;
// }
- //
- // /*
- // * For negative values not handled by the previous "if" statement the
- // * test here will correctly set the value of TMP.
- // */
- // TMP = ((in - out) >= 0.5) ? 1 : 0;
- // return out += TMP;
-
- // Test for NaN.
- if (type == Primitive::kPrimDouble) {
- __ CmpUnD(FTMP, in, in);
- } else {
- __ CmpUnS(FTMP, in, in);
- }
-
- // Return zero for NaN.
- __ Move(out, ZERO);
- __ Bc1nez(FTMP, &done);
+ // return out;
// out = floor(in);
if (type == Primitive::kPrimDouble) {
@@ -948,27 +909,26 @@ static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, Pri
__ Mfc1(out, FTMP);
}
- // TMP = (out = java.lang.Integer.MAX_VALUE) ? 1 : 0;
+ // if (out != MAX_VALUE && out != MIN_VALUE)
if (type == Primitive::kPrimDouble) {
- __ LoadConst64(AT, std::numeric_limits<int64_t>::max());
+ __ Daddiu(TMP, out, 1);
+ __ Dati(TMP, 0x8000); // TMP = out + 0x8000 0000 0000 0001
+ // or out - 0x7FFF FFFF FFFF FFFF.
+ // IOW, TMP = 1 if out = Long.MIN_VALUE
+ // or TMP = 0 if out = Long.MAX_VALUE.
+ __ Dsrl(TMP, TMP, 1); // TMP = 0 if out = Long.MIN_VALUE
+ // or out = Long.MAX_VALUE.
+ __ Beqzc(TMP, &done);
} else {
- __ LoadConst32(AT, std::numeric_limits<int32_t>::max());
+ __ Addiu(TMP, out, 1);
+ __ Aui(TMP, TMP, 0x8000); // TMP = out + 0x8000 0001
+ // or out - 0x7FFF FFFF.
+ // IOW, TMP = 1 if out = Int.MIN_VALUE
+ // or TMP = 0 if out = Int.MAX_VALUE.
+ __ Srl(TMP, TMP, 1); // TMP = 0 if out = Int.MIN_VALUE
+ // or out = Int.MAX_VALUE.
+ __ Beqzc(TMP, &done);
}
- __ Bnec(AT, out, &finite);
-
- if (type == Primitive::kPrimDouble) {
- __ Dmtc1(ZERO, FTMP);
- __ CmpLtD(FTMP, in, FTMP);
- __ Dmfc1(AT, FTMP);
- } else {
- __ Mtc1(ZERO, FTMP);
- __ CmpLtS(FTMP, in, FTMP);
- __ Mfc1(AT, FTMP);
- }
-
- __ Bc(&add);
-
- __ Bind(&finite);
// TMP = (0.5 <= (in - out)) ? -1 : 0;
if (type == Primitive::kPrimDouble) {
@@ -977,23 +937,21 @@ static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, Pri
__ SubD(FTMP, in, FTMP);
__ Dmtc1(AT, half);
__ CmpLeD(FTMP, half, FTMP);
- __ Dmfc1(AT, FTMP);
+ __ Dmfc1(TMP, FTMP);
} else {
__ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float".
__ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
__ SubS(FTMP, in, FTMP);
__ Mtc1(AT, half);
__ CmpLeS(FTMP, half, FTMP);
- __ Mfc1(AT, FTMP);
+ __ Mfc1(TMP, FTMP);
}
- __ Bind(&add);
-
// Return out -= TMP.
if (type == Primitive::kPrimDouble) {
- __ Dsubu(out, out, AT);
+ __ Dsubu(out, out, TMP);
} else {
- __ Subu(out, out, AT);
+ __ Subu(out, out, TMP);
}
__ Bind(&done);
@@ -1168,6 +1126,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall),
kIntrinsified);
+ if (can_call && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
@@ -2564,6 +2525,84 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathTanh(HInvoke* invoke) {
GenFPToFPCall(invoke, codegen_, kQuickTanh);
}
+// long java.lang.Integer.valueOf(long)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerValueOf(HInvoke* invoke) {
+ InvokeRuntimeCallingConvention calling_convention;
+ IntrinsicVisitor::ComputeIntegerValueOfLocations(
+ invoke,
+ codegen_,
+ calling_convention.GetReturnLocation(Primitive::kPrimNot),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerValueOf(HInvoke* invoke) {
+ IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+ LocationSummary* locations = invoke->GetLocations();
+ Mips64Assembler* assembler = GetAssembler();
+ InstructionCodeGeneratorMIPS64* icodegen =
+ down_cast<InstructionCodeGeneratorMIPS64*>(codegen_->GetInstructionVisitor());
+
+ GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ InvokeRuntimeCallingConvention calling_convention;
+ if (invoke->InputAt(0)->IsConstant()) {
+ int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+ if (value >= info.low && value <= info.high) {
+ // Just embed the j.l.Integer in the code.
+ ScopedObjectAccess soa(Thread::Current());
+ mirror::Object* boxed = info.cache->Get(value + (-info.low));
+ DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+ __ LoadConst64(out, address);
+ } else {
+ // Allocate and initialize a new j.l.Integer.
+ // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+ // JIT object table.
+ uint32_t address =
+ dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ LoadConst64(calling_convention.GetRegisterAt(0), address);
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ StoreConstToOffset(kStoreWord, value, out, info.value_offset, TMP);
+ // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+ // one.
+ icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+ }
+ } else {
+ GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+ Mips64Label allocate, done;
+ int32_t count = static_cast<uint32_t>(info.high) - info.low + 1;
+
+ // Is (info.low <= in) && (in <= info.high)?
+ __ Addiu32(out, in, -info.low);
+ // As unsigned quantities is out < (info.high - info.low + 1)?
+ __ LoadConst32(AT, count);
+ // Branch if out >= (info.high - info.low + 1).
+ // This means that "in" is outside of the range [info.low, info.high].
+ __ Bgeuc(out, AT, &allocate);
+
+ // If the value is within the bounds, load the j.l.Integer directly from the array.
+ uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+ __ LoadConst64(TMP, data_offset + address);
+ __ Dlsa(out, out, TMP, TIMES_4);
+ __ Lwu(out, out, 0);
+ __ MaybeUnpoisonHeapReference(out);
+ __ Bc(&done);
+
+ __ Bind(&allocate);
+ // Otherwise allocate and initialize a new j.l.Integer.
+ address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ LoadConst64(calling_convention.GetRegisterAt(0), address);
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ StoreToOffset(kStoreWord, in, out, info.value_offset);
+ // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+ // one.
+ icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+ __ Bind(&done);
+ }
+}
+
UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent)
UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy)
@@ -2583,7 +2622,7 @@ UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetInt)
UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetLong)
UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetObject)
-UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerValueOf)
+UNIMPLEMENTED_INTRINSIC(MIPS64, ThreadInterrupted)
UNREACHABLE_INTRINSICS(MIPS64)
diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h
index 5b95c26a21..179627ab20 100644
--- a/compiler/optimizing/intrinsics_mips64.h
+++ b/compiler/optimizing/intrinsics_mips64.h
@@ -49,6 +49,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
bool TryDispatch(HInvoke* invoke);
private:
+ CodeGeneratorMIPS64* codegen_;
ArenaAllocator* arena_;
DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS64);
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 8e4574774f..a9da15d2ce 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -31,7 +31,7 @@
#include "mirror/reference.h"
#include "mirror/string.h"
#include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
#include "utils/x86/assembler_x86.h"
#include "utils/x86/constants_x86.h"
@@ -2819,65 +2819,6 @@ void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke)
GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
}
-void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) {
- if (kEmitCompilerReadBarrier) {
- // Do not intrinsify this call with the read barrier configuration.
- return;
- }
- LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnSlowPath,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
- DCHECK(!kEmitCompilerReadBarrier);
- LocationSummary* locations = invoke->GetLocations();
- X86Assembler* assembler = GetAssembler();
-
- Register obj = locations->InAt(0).AsRegister<Register>();
- Register out = locations->Out().AsRegister<Register>();
-
- SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
- codegen_->AddSlowPath(slow_path);
-
- // Load ArtMethod first.
- HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
- DCHECK(invoke_direct != nullptr);
- Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall(
- invoke_direct, locations->GetTemp(0));
- DCHECK(temp_loc.Equals(locations->GetTemp(0)));
- Register temp = temp_loc.AsRegister<Register>();
-
- // Now get declaring class.
- __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
-
- uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
- uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
- DCHECK_NE(slow_path_flag_offset, 0u);
- DCHECK_NE(disable_flag_offset, 0u);
- DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
-
- // Check static flags preventing us for using intrinsic.
- if (slow_path_flag_offset == disable_flag_offset + 1) {
- __ cmpw(Address(temp, disable_flag_offset), Immediate(0));
- __ j(kNotEqual, slow_path->GetEntryLabel());
- } else {
- __ cmpb(Address(temp, disable_flag_offset), Immediate(0));
- __ j(kNotEqual, slow_path->GetEntryLabel());
- __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0));
- __ j(kNotEqual, slow_path->GetEntryLabel());
- }
-
- // Fast path.
- __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
- codegen_->MaybeRecordImplicitNullCheck(invoke);
- __ MaybeUnpoisonHeapReference(out);
- __ Bind(slow_path->GetExitLabel());
-}
-
static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
return instruction->InputAt(input0) == instruction->InputAt(input1);
}
@@ -3407,7 +3348,29 @@ void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
}
}
+void IntrinsicLocationsBuilderX86::VisitThreadInterrupted(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) {
+ X86Assembler* assembler = GetAssembler();
+ Register out = invoke->GetLocations()->Out().AsRegister<Register>();
+ Address address = Address::Absolute(Thread::InterruptedOffset<kX86PointerSize>().Int32Value());
+ NearLabel done;
+ __ fs()->movl(out, address);
+ __ testl(out, out);
+ __ j(kEqual, &done);
+ __ fs()->movl(address, Immediate(0));
+ codegen_->MemoryFence();
+ __ Bind(&done);
+}
+
+
UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
+UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent)
UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 8ed2ad86bf..8100645e54 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -31,7 +31,7 @@
#include "mirror/reference.h"
#include "mirror/string.h"
#include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
#include "utils/x86_64/assembler_x86_64.h"
#include "utils/x86_64/constants_x86_64.h"
@@ -759,7 +759,7 @@ static void CreateFPToFPCallLocations(ArenaAllocator* arena,
// We have to ensure that the native code doesn't clobber the XMM registers which are
// non-volatile for ART, but volatile for Native calls. This will ensure that they are
// saved in the prologue and properly restored.
- for (auto fp_reg : non_volatile_xmm_regs) {
+ for (FloatRegister fp_reg : non_volatile_xmm_regs) {
locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
}
}
@@ -898,7 +898,7 @@ static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
// We have to ensure that the native code doesn't clobber the XMM registers which are
// non-volatile for ART, but volatile for Native calls. This will ensure that they are
// saved in the prologue and properly restored.
- for (auto fp_reg : non_volatile_xmm_regs) {
+ for (FloatRegister fp_reg : non_volatile_xmm_regs) {
locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
}
}
@@ -2959,65 +2959,6 @@ void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invok
GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
}
-void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
- if (kEmitCompilerReadBarrier) {
- // Do not intrinsify this call with the read barrier configuration.
- return;
- }
- LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnSlowPath,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
- DCHECK(!kEmitCompilerReadBarrier);
- LocationSummary* locations = invoke->GetLocations();
- X86_64Assembler* assembler = GetAssembler();
-
- CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-
- SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
- codegen_->AddSlowPath(slow_path);
-
- // Load ArtMethod first.
- HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
- DCHECK(invoke_direct != nullptr);
- Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall(
- invoke_direct, locations->GetTemp(0));
- DCHECK(temp_loc.Equals(locations->GetTemp(0)));
- CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
-
- // Now get declaring class.
- __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
-
- uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
- uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
- DCHECK_NE(slow_path_flag_offset, 0u);
- DCHECK_NE(disable_flag_offset, 0u);
- DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
-
- // Check static flags preventing us for using intrinsic.
- if (slow_path_flag_offset == disable_flag_offset + 1) {
- __ cmpw(Address(temp, disable_flag_offset), Immediate(0));
- __ j(kNotEqual, slow_path->GetEntryLabel());
- } else {
- __ cmpb(Address(temp, disable_flag_offset), Immediate(0));
- __ j(kNotEqual, slow_path->GetEntryLabel());
- __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0));
- __ j(kNotEqual, slow_path->GetEntryLabel());
- }
-
- // Fast path.
- __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
- codegen_->MaybeRecordImplicitNullCheck(invoke);
- __ MaybeUnpoisonHeapReference(out);
- __ Bind(slow_path->GetExitLabel());
-}
-
void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) {
InvokeRuntimeCallingConvention calling_convention;
IntrinsicVisitor::ComputeIntegerValueOfLocations(
@@ -3085,6 +3026,28 @@ void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) {
}
}
+void IntrinsicLocationsBuilderX86_64::VisitThreadInterrupted(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) {
+ X86_64Assembler* assembler = GetAssembler();
+ CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
+ Address address = Address::Absolute
+ (Thread::InterruptedOffset<kX86_64PointerSize>().Int32Value(), /* no_rip */ true);
+ NearLabel done;
+ __ gs()->movl(out, address);
+ __ testl(out, out);
+ __ j(kEqual, &done);
+ __ gs()->movl(address, Immediate(0));
+ codegen_->MemoryFence();
+ __ Bind(&done);
+}
+
+UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent)
UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc
new file mode 100644
index 0000000000..f2ee345c8c
--- /dev/null
+++ b/compiler/optimizing/load_store_analysis.cc
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "load_store_analysis.h"
+
+namespace art {
+
+// A cap for the number of heap locations to prevent pathological time/space consumption.
+// The number of heap locations for most of the methods stays below this threshold.
+constexpr size_t kMaxNumberOfHeapLocations = 32;
+
+void LoadStoreAnalysis::Run() {
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ heap_location_collector_.VisitBasicBlock(block);
+ }
+
+ if (heap_location_collector_.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) {
+ // Bail out if there are too many heap locations to deal with.
+ heap_location_collector_.CleanUp();
+ return;
+ }
+ if (!heap_location_collector_.HasHeapStores()) {
+ // Without heap stores, this pass would act mostly as GVN on heap accesses.
+ heap_location_collector_.CleanUp();
+ return;
+ }
+ if (heap_location_collector_.HasVolatile() || heap_location_collector_.HasMonitorOps()) {
+ // Don't do load/store elimination if the method has volatile field accesses or
+ // monitor operations, for now.
+ // TODO: do it right.
+ heap_location_collector_.CleanUp();
+ return;
+ }
+
+ heap_location_collector_.BuildAliasingMatrix();
+}
+
+} // namespace art
diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h
new file mode 100644
index 0000000000..4e940f30bf
--- /dev/null
+++ b/compiler/optimizing/load_store_analysis.h
@@ -0,0 +1,518 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_
+#define ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_
+
+#include "escape.h"
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+// A ReferenceInfo contains additional info about a reference such as
+// whether it's a singleton, returned, etc.
+class ReferenceInfo : public ArenaObject<kArenaAllocMisc> {
+ public:
+ ReferenceInfo(HInstruction* reference, size_t pos)
+ : reference_(reference),
+ position_(pos),
+ is_singleton_(true),
+ is_singleton_and_not_returned_(true),
+ is_singleton_and_not_deopt_visible_(true),
+ has_index_aliasing_(false) {
+ CalculateEscape(reference_,
+ nullptr,
+ &is_singleton_,
+ &is_singleton_and_not_returned_,
+ &is_singleton_and_not_deopt_visible_);
+ }
+
+ HInstruction* GetReference() const {
+ return reference_;
+ }
+
+ size_t GetPosition() const {
+ return position_;
+ }
+
+ // Returns true if reference_ is the only name that can refer to its value during
+ // the lifetime of the method. So it's guaranteed to not have any alias in
+ // the method (including its callees).
+ bool IsSingleton() const {
+ return is_singleton_;
+ }
+
+ // Returns true if reference_ is a singleton and not returned to the caller or
+ // used as an environment local of an HDeoptimize instruction.
+ // The allocation and stores into reference_ may be eliminated for such cases.
+ bool IsSingletonAndRemovable() const {
+ return is_singleton_and_not_returned_ && is_singleton_and_not_deopt_visible_;
+ }
+
+ // Returns true if reference_ is a singleton and returned to the caller or
+ // used as an environment local of an HDeoptimize instruction.
+ bool IsSingletonAndNonRemovable() const {
+ return is_singleton_ &&
+ (!is_singleton_and_not_returned_ || !is_singleton_and_not_deopt_visible_);
+ }
+
+ bool HasIndexAliasing() {
+ return has_index_aliasing_;
+ }
+
+ void SetHasIndexAliasing(bool has_index_aliasing) {
+ // Only allow setting to true.
+ DCHECK(has_index_aliasing);
+ has_index_aliasing_ = has_index_aliasing;
+ }
+
+ private:
+ HInstruction* const reference_;
+ const size_t position_; // position in HeapLocationCollector's ref_info_array_.
+
+ // Can only be referred to by a single name in the method.
+ bool is_singleton_;
+ // Is singleton and not returned to caller.
+ bool is_singleton_and_not_returned_;
+ // Is singleton and not used as an environment local of HDeoptimize.
+ bool is_singleton_and_not_deopt_visible_;
+ // Some heap locations with reference_ have array index aliasing,
+ // e.g. arr[i] and arr[j] may be the same location.
+ bool has_index_aliasing_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReferenceInfo);
+};
+
+// A heap location is a reference-offset/index pair that a value can be loaded from
+// or stored to.
+class HeapLocation : public ArenaObject<kArenaAllocMisc> {
+ public:
+ static constexpr size_t kInvalidFieldOffset = -1;
+
+ // TODO: more fine-grained array types.
+ static constexpr int16_t kDeclaringClassDefIndexForArrays = -1;
+
+ HeapLocation(ReferenceInfo* ref_info,
+ size_t offset,
+ HInstruction* index,
+ int16_t declaring_class_def_index)
+ : ref_info_(ref_info),
+ offset_(offset),
+ index_(index),
+ declaring_class_def_index_(declaring_class_def_index),
+ value_killed_by_loop_side_effects_(true) {
+ DCHECK(ref_info != nullptr);
+ DCHECK((offset == kInvalidFieldOffset && index != nullptr) ||
+ (offset != kInvalidFieldOffset && index == nullptr));
+ if (ref_info->IsSingleton() && !IsArrayElement()) {
+ // Assume this location's value cannot be killed by loop side effects
+ // until proven otherwise.
+ value_killed_by_loop_side_effects_ = false;
+ }
+ }
+
+ ReferenceInfo* GetReferenceInfo() const { return ref_info_; }
+ size_t GetOffset() const { return offset_; }
+ HInstruction* GetIndex() const { return index_; }
+
+ // Returns the definition of declaring class' dex index.
+ // It's kDeclaringClassDefIndexForArrays for an array element.
+ int16_t GetDeclaringClassDefIndex() const {
+ return declaring_class_def_index_;
+ }
+
+ bool IsArrayElement() const {
+ return index_ != nullptr;
+ }
+
+ bool IsValueKilledByLoopSideEffects() const {
+ return value_killed_by_loop_side_effects_;
+ }
+
+ void SetValueKilledByLoopSideEffects(bool val) {
+ value_killed_by_loop_side_effects_ = val;
+ }
+
+ private:
+ ReferenceInfo* const ref_info_; // reference for instance/static field or array access.
+ const size_t offset_; // offset of static/instance field.
+ HInstruction* const index_; // index of an array element.
+ const int16_t declaring_class_def_index_; // declaring class's def's dex index.
+ bool value_killed_by_loop_side_effects_; // value of this location may be killed by loop
+ // side effects because this location is stored
+ // into inside a loop. This gives
+ // better info on whether a singleton's location
+ // value may be killed by loop side effects.
+
+ DISALLOW_COPY_AND_ASSIGN(HeapLocation);
+};
+
+// A HeapLocationCollector collects all relevant heap locations and keeps
+// an aliasing matrix for all locations.
+class HeapLocationCollector : public HGraphVisitor {
+ public:
+ static constexpr size_t kHeapLocationNotFound = -1;
+ // Start with a single uint32_t word. That's enough bits for pair-wise
+ // aliasing matrix of 8 heap locations.
+ static constexpr uint32_t kInitialAliasingMatrixBitVectorSize = 32;
+
+ explicit HeapLocationCollector(HGraph* graph)
+ : HGraphVisitor(graph),
+ ref_info_array_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+ heap_locations_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+ aliasing_matrix_(graph->GetArena(),
+ kInitialAliasingMatrixBitVectorSize,
+ true,
+ kArenaAllocLSE),
+ has_heap_stores_(false),
+ has_volatile_(false),
+ has_monitor_operations_(false) {}
+
+ void CleanUp() {
+ heap_locations_.clear();
+ ref_info_array_.clear();
+ }
+
+ size_t GetNumberOfHeapLocations() const {
+ return heap_locations_.size();
+ }
+
+ HeapLocation* GetHeapLocation(size_t index) const {
+ return heap_locations_[index];
+ }
+
+ HInstruction* HuntForOriginalReference(HInstruction* ref) const {
+ DCHECK(ref != nullptr);
+ while (ref->IsNullCheck() || ref->IsBoundType()) {
+ ref = ref->InputAt(0);
+ }
+ return ref;
+ }
+
+ ReferenceInfo* FindReferenceInfoOf(HInstruction* ref) const {
+ for (size_t i = 0; i < ref_info_array_.size(); i++) {
+ ReferenceInfo* ref_info = ref_info_array_[i];
+ if (ref_info->GetReference() == ref) {
+ DCHECK_EQ(i, ref_info->GetPosition());
+ return ref_info;
+ }
+ }
+ return nullptr;
+ }
+
+ bool HasHeapStores() const {
+ return has_heap_stores_;
+ }
+
+ bool HasVolatile() const {
+ return has_volatile_;
+ }
+
+ bool HasMonitorOps() const {
+ return has_monitor_operations_;
+ }
+
+ // Find and return the heap location index in heap_locations_.
+ size_t FindHeapLocationIndex(ReferenceInfo* ref_info,
+ size_t offset,
+ HInstruction* index,
+ int16_t declaring_class_def_index) const {
+ for (size_t i = 0; i < heap_locations_.size(); i++) {
+ HeapLocation* loc = heap_locations_[i];
+ if (loc->GetReferenceInfo() == ref_info &&
+ loc->GetOffset() == offset &&
+ loc->GetIndex() == index &&
+ loc->GetDeclaringClassDefIndex() == declaring_class_def_index) {
+ return i;
+ }
+ }
+ return kHeapLocationNotFound;
+ }
+
+ // Returns true if heap_locations_[index1] and heap_locations_[index2] may alias.
+ bool MayAlias(size_t index1, size_t index2) const {
+ if (index1 < index2) {
+ return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index1, index2));
+ } else if (index1 > index2) {
+ return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index2, index1));
+ } else {
+ DCHECK(false) << "index1 and index2 are expected to be different";
+ return true;
+ }
+ }
+
+ void BuildAliasingMatrix() {
+ const size_t number_of_locations = heap_locations_.size();
+ if (number_of_locations == 0) {
+ return;
+ }
+ size_t pos = 0;
+ // Compute aliasing info between every pair of different heap locations.
+ // Save the result in a matrix represented as a BitVector.
+ for (size_t i = 0; i < number_of_locations - 1; i++) {
+ for (size_t j = i + 1; j < number_of_locations; j++) {
+ if (ComputeMayAlias(i, j)) {
+ aliasing_matrix_.SetBit(CheckedAliasingMatrixPosition(i, j, pos));
+ }
+ pos++;
+ }
+ }
+ }
+
+ private:
+ // An allocation cannot alias with a name which already exists at the point
+ // of the allocation, such as a parameter or a load happening before the allocation.
+ bool MayAliasWithPreexistenceChecking(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const {
+ if (ref_info1->GetReference()->IsNewInstance() || ref_info1->GetReference()->IsNewArray()) {
+ // Any reference that can alias with the allocation must appear after it in the block/in
+ // the block's successors. In reverse post order, those instructions will be visited after
+ // the allocation.
+ return ref_info2->GetPosition() >= ref_info1->GetPosition();
+ }
+ return true;
+ }
+
+ bool CanReferencesAlias(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const {
+ if (ref_info1 == ref_info2) {
+ return true;
+ } else if (ref_info1->IsSingleton()) {
+ return false;
+ } else if (ref_info2->IsSingleton()) {
+ return false;
+ } else if (!MayAliasWithPreexistenceChecking(ref_info1, ref_info2) ||
+ !MayAliasWithPreexistenceChecking(ref_info2, ref_info1)) {
+ return false;
+ }
+ return true;
+ }
+
+ // `index1` and `index2` are indices in the array of collected heap locations.
+ // Returns the position in the bit vector that tracks whether the two heap
+ // locations may alias.
+ size_t AliasingMatrixPosition(size_t index1, size_t index2) const {
+ DCHECK(index2 > index1);
+ const size_t number_of_locations = heap_locations_.size();
+ // It's (num_of_locations - 1) + ... + (num_of_locations - index1) + (index2 - index1 - 1).
+ return (number_of_locations * index1 - (1 + index1) * index1 / 2 + (index2 - index1 - 1));
+ }
+
+ // An additional position is passed in to make sure the calculated position is correct.
+ size_t CheckedAliasingMatrixPosition(size_t index1, size_t index2, size_t position) {
+ size_t calculated_position = AliasingMatrixPosition(index1, index2);
+ DCHECK_EQ(calculated_position, position);
+ return calculated_position;
+ }
+
+ // Compute if two locations may alias to each other.
+ bool ComputeMayAlias(size_t index1, size_t index2) const {
+ HeapLocation* loc1 = heap_locations_[index1];
+ HeapLocation* loc2 = heap_locations_[index2];
+ if (loc1->GetOffset() != loc2->GetOffset()) {
+ // Either two different instance fields, or one is an instance
+ // field and the other is an array element.
+ return false;
+ }
+ if (loc1->GetDeclaringClassDefIndex() != loc2->GetDeclaringClassDefIndex()) {
+ // Different types.
+ return false;
+ }
+ if (!CanReferencesAlias(loc1->GetReferenceInfo(), loc2->GetReferenceInfo())) {
+ return false;
+ }
+ if (loc1->IsArrayElement() && loc2->IsArrayElement()) {
+ HInstruction* array_index1 = loc1->GetIndex();
+ HInstruction* array_index2 = loc2->GetIndex();
+ DCHECK(array_index1 != nullptr);
+ DCHECK(array_index2 != nullptr);
+ if (array_index1->IsIntConstant() &&
+ array_index2->IsIntConstant() &&
+ array_index1->AsIntConstant()->GetValue() != array_index2->AsIntConstant()->GetValue()) {
+ // Different constant indices do not alias.
+ return false;
+ }
+ ReferenceInfo* ref_info = loc1->GetReferenceInfo();
+ ref_info->SetHasIndexAliasing(true);
+ }
+ return true;
+ }
+
+ ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* instruction) {
+ ReferenceInfo* ref_info = FindReferenceInfoOf(instruction);
+ if (ref_info == nullptr) {
+ size_t pos = ref_info_array_.size();
+ ref_info = new (GetGraph()->GetArena()) ReferenceInfo(instruction, pos);
+ ref_info_array_.push_back(ref_info);
+ }
+ return ref_info;
+ }
+
+ void CreateReferenceInfoForReferenceType(HInstruction* instruction) {
+ if (instruction->GetType() != Primitive::kPrimNot) {
+ return;
+ }
+ DCHECK(FindReferenceInfoOf(instruction) == nullptr);
+ GetOrCreateReferenceInfo(instruction);
+ }
+
+ HeapLocation* GetOrCreateHeapLocation(HInstruction* ref,
+ size_t offset,
+ HInstruction* index,
+ int16_t declaring_class_def_index) {
+ HInstruction* original_ref = HuntForOriginalReference(ref);
+ ReferenceInfo* ref_info = GetOrCreateReferenceInfo(original_ref);
+ size_t heap_location_idx = FindHeapLocationIndex(
+ ref_info, offset, index, declaring_class_def_index);
+ if (heap_location_idx == kHeapLocationNotFound) {
+ HeapLocation* heap_loc = new (GetGraph()->GetArena())
+ HeapLocation(ref_info, offset, index, declaring_class_def_index);
+ heap_locations_.push_back(heap_loc);
+ return heap_loc;
+ }
+ return heap_locations_[heap_location_idx];
+ }
+
+ HeapLocation* VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) {
+ if (field_info.IsVolatile()) {
+ has_volatile_ = true;
+ }
+ const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex();
+ const size_t offset = field_info.GetFieldOffset().SizeValue();
+ return GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index);
+ }
+
+ void VisitArrayAccess(HInstruction* array, HInstruction* index) {
+ GetOrCreateHeapLocation(array, HeapLocation::kInvalidFieldOffset,
+ index, HeapLocation::kDeclaringClassDefIndexForArrays);
+ }
+
+ void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE {
+ VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+ CreateReferenceInfoForReferenceType(instruction);
+ }
+
+ void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
+ HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+ has_heap_stores_ = true;
+ if (location->GetReferenceInfo()->IsSingleton()) {
+ // A singleton's location value may be killed by loop side effects if it's
+ // defined before that loop, and it's stored into inside that loop.
+ HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation();
+ if (loop_info != nullptr) {
+ HInstruction* ref = location->GetReferenceInfo()->GetReference();
+ DCHECK(ref->IsNewInstance());
+ if (loop_info->IsDefinedOutOfTheLoop(ref)) {
+ // ref's location value may be killed by this loop's side effects.
+ location->SetValueKilledByLoopSideEffects(true);
+ } else {
+ // ref is defined inside this loop so this loop's side effects cannot
+ // kill its location value at the loop header since ref/its location doesn't
+ // exist yet at the loop header.
+ }
+ }
+ } else {
+ // For non-singletons, value_killed_by_loop_side_effects_ is inited to
+ // true.
+ DCHECK_EQ(location->IsValueKilledByLoopSideEffects(), true);
+ }
+ }
+
+ void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE {
+ VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+ CreateReferenceInfoForReferenceType(instruction);
+ }
+
+ void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE {
+ VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+ has_heap_stores_ = true;
+ }
+
+ // We intentionally don't collect HUnresolvedInstanceField/HUnresolvedStaticField accesses
+ // since we cannot accurately track the fields.
+
+ void VisitArrayGet(HArrayGet* instruction) OVERRIDE {
+ VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1));
+ CreateReferenceInfoForReferenceType(instruction);
+ }
+
+ void VisitArraySet(HArraySet* instruction) OVERRIDE {
+ VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1));
+ has_heap_stores_ = true;
+ }
+
+ void VisitNewInstance(HNewInstance* new_instance) OVERRIDE {
+ // Any references appearing in the ref_info_array_ so far cannot alias with new_instance.
+ CreateReferenceInfoForReferenceType(new_instance);
+ }
+
+ void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE {
+ CreateReferenceInfoForReferenceType(instruction);
+ }
+
+ void VisitInvokeVirtual(HInvokeVirtual* instruction) OVERRIDE {
+ CreateReferenceInfoForReferenceType(instruction);
+ }
+
+ void VisitInvokeInterface(HInvokeInterface* instruction) OVERRIDE {
+ CreateReferenceInfoForReferenceType(instruction);
+ }
+
+ void VisitParameterValue(HParameterValue* instruction) OVERRIDE {
+ CreateReferenceInfoForReferenceType(instruction);
+ }
+
+ void VisitSelect(HSelect* instruction) OVERRIDE {
+ CreateReferenceInfoForReferenceType(instruction);
+ }
+
+ void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) OVERRIDE {
+ has_monitor_operations_ = true;
+ }
+
+ ArenaVector<ReferenceInfo*> ref_info_array_; // All references used for heap accesses.
+ ArenaVector<HeapLocation*> heap_locations_; // All heap locations.
+ ArenaBitVector aliasing_matrix_; // aliasing info between each pair of locations.
+ bool has_heap_stores_; // If there is no heap stores, LSE acts as GVN with better
+ // alias analysis and won't be as effective.
+ bool has_volatile_; // If there are volatile field accesses.
+ bool has_monitor_operations_; // If there are monitor operations.
+
+ DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector);
+};
+
+class LoadStoreAnalysis : public HOptimization {
+ public:
+ explicit LoadStoreAnalysis(HGraph* graph)
+ : HOptimization(graph, kLoadStoreAnalysisPassName),
+ heap_location_collector_(graph) {}
+
+ const HeapLocationCollector& GetHeapLocationCollector() const {
+ return heap_location_collector_;
+ }
+
+ void Run() OVERRIDE;
+
+ static constexpr const char* kLoadStoreAnalysisPassName = "load_store_analysis";
+
+ private:
+ HeapLocationCollector heap_location_collector_;
+
+ DISALLOW_COPY_AND_ASSIGN(LoadStoreAnalysis);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_
diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc
new file mode 100644
index 0000000000..24187777f6
--- /dev/null
+++ b/compiler/optimizing/load_store_analysis_test.cc
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "load_store_analysis.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+class LoadStoreAnalysisTest : public CommonCompilerTest {
+ public:
+ LoadStoreAnalysisTest() : pool_(), allocator_(&pool_) {
+ graph_ = CreateGraph(&allocator_);
+ }
+
+ ArenaPool pool_;
+ ArenaAllocator allocator_;
+ HGraph* graph_;
+};
+
+TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) {
+ HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
+ graph_->AddBlock(entry);
+ graph_->SetEntryBlock(entry);
+
+ // entry:
+ // array ParameterValue
+ // index ParameterValue
+ // c1 IntConstant
+ // c2 IntConstant
+ // c3 IntConstant
+ // array_get1 ArrayGet [array, c1]
+ // array_get2 ArrayGet [array, c2]
+ // array_set1 ArraySet [array, c1, c3]
+ // array_set2 ArraySet [array, index, c3]
+ HInstruction* array = new (&allocator_) HParameterValue(
+ graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);
+ HInstruction* index = new (&allocator_) HParameterValue(
+ graph_->GetDexFile(), dex::TypeIndex(1), 1, Primitive::kPrimInt);
+ HInstruction* c1 = graph_->GetIntConstant(1);
+ HInstruction* c2 = graph_->GetIntConstant(2);
+ HInstruction* c3 = graph_->GetIntConstant(3);
+ HInstruction* array_get1 = new (&allocator_) HArrayGet(array, c1, Primitive::kPrimInt, 0);
+ HInstruction* array_get2 = new (&allocator_) HArrayGet(array, c2, Primitive::kPrimInt, 0);
+ HInstruction* array_set1 = new (&allocator_) HArraySet(array, c1, c3, Primitive::kPrimInt, 0);
+ HInstruction* array_set2 = new (&allocator_) HArraySet(array, index, c3, Primitive::kPrimInt, 0);
+ entry->AddInstruction(array);
+ entry->AddInstruction(index);
+ entry->AddInstruction(array_get1);
+ entry->AddInstruction(array_get2);
+ entry->AddInstruction(array_set1);
+ entry->AddInstruction(array_set2);
+
+ // Test HeapLocationCollector initialization.
+ // Should be no heap locations, no operations on the heap.
+ HeapLocationCollector heap_location_collector(graph_);
+ ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 0U);
+ ASSERT_FALSE(heap_location_collector.HasHeapStores());
+
+ // Test that after visiting the graph_, it must see following heap locations
+ // array[c1], array[c2], array[index]; and it should see heap stores.
+ heap_location_collector.VisitBasicBlock(entry);
+ ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 3U);
+ ASSERT_TRUE(heap_location_collector.HasHeapStores());
+
+ // Test queries on HeapLocationCollector's ref info and index records.
+ ReferenceInfo* ref = heap_location_collector.FindReferenceInfoOf(array);
+ size_t field_off = HeapLocation::kInvalidFieldOffset;
+ size_t class_def = HeapLocation::kDeclaringClassDefIndexForArrays;
+ size_t loc1 = heap_location_collector.FindHeapLocationIndex(ref, field_off, c1, class_def);
+ size_t loc2 = heap_location_collector.FindHeapLocationIndex(ref, field_off, c2, class_def);
+ size_t loc3 = heap_location_collector.FindHeapLocationIndex(ref, field_off, index, class_def);
+ // must find this reference info for array in HeapLocationCollector.
+ ASSERT_TRUE(ref != nullptr);
+ // must find these heap locations;
+ // and array[1], array[2], array[3] should be different heap locations.
+ ASSERT_TRUE(loc1 != HeapLocationCollector::kHeapLocationNotFound);
+ ASSERT_TRUE(loc2 != HeapLocationCollector::kHeapLocationNotFound);
+ ASSERT_TRUE(loc3 != HeapLocationCollector::kHeapLocationNotFound);
+ ASSERT_TRUE(loc1 != loc2);
+ ASSERT_TRUE(loc2 != loc3);
+ ASSERT_TRUE(loc1 != loc3);
+
+ // Test alias relationships after building aliasing matrix.
+ // array[1] and array[2] clearly should not alias;
+ // array[index] should alias with the others, because index is an unknow value.
+ heap_location_collector.BuildAliasingMatrix();
+ ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
+ ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3));
+ ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3));
+}
+
+TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) {
+ HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
+ graph_->AddBlock(entry);
+ graph_->SetEntryBlock(entry);
+
+ // entry:
+ // object ParameterValue
+ // c1 IntConstant
+ // set_field10 InstanceFieldSet [object, c1, 10]
+ // get_field10 InstanceFieldGet [object, 10]
+ // get_field20 InstanceFieldGet [object, 20]
+
+ HInstruction* c1 = graph_->GetIntConstant(1);
+ HInstruction* object = new (&allocator_) HParameterValue(graph_->GetDexFile(),
+ dex::TypeIndex(0),
+ 0,
+ Primitive::kPrimNot);
+ HInstanceFieldSet* set_field10 = new (&allocator_) HInstanceFieldSet(object,
+ c1,
+ nullptr,
+ Primitive::kPrimInt,
+ MemberOffset(10),
+ false,
+ kUnknownFieldIndex,
+ kUnknownClassDefIndex,
+ graph_->GetDexFile(),
+ 0);
+ HInstanceFieldGet* get_field10 = new (&allocator_) HInstanceFieldGet(object,
+ nullptr,
+ Primitive::kPrimInt,
+ MemberOffset(10),
+ false,
+ kUnknownFieldIndex,
+ kUnknownClassDefIndex,
+ graph_->GetDexFile(),
+ 0);
+ HInstanceFieldGet* get_field20 = new (&allocator_) HInstanceFieldGet(object,
+ nullptr,
+ Primitive::kPrimInt,
+ MemberOffset(20),
+ false,
+ kUnknownFieldIndex,
+ kUnknownClassDefIndex,
+ graph_->GetDexFile(),
+ 0);
+ entry->AddInstruction(object);
+ entry->AddInstruction(set_field10);
+ entry->AddInstruction(get_field10);
+ entry->AddInstruction(get_field20);
+
+ // Test HeapLocationCollector initialization.
+ // Should be no heap locations, no operations on the heap.
+ HeapLocationCollector heap_location_collector(graph_);
+ ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 0U);
+ ASSERT_FALSE(heap_location_collector.HasHeapStores());
+
+ // Test that after visiting the graph, it must see following heap locations
+ // object.field10, object.field20 and it should see heap stores.
+ heap_location_collector.VisitBasicBlock(entry);
+ ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 2U);
+ ASSERT_TRUE(heap_location_collector.HasHeapStores());
+
+ // Test queries on HeapLocationCollector's ref info and index records.
+ ReferenceInfo* ref = heap_location_collector.FindReferenceInfoOf(object);
+ size_t loc1 = heap_location_collector.FindHeapLocationIndex(
+ ref, 10, nullptr, kUnknownClassDefIndex);
+ size_t loc2 = heap_location_collector.FindHeapLocationIndex(
+ ref, 20, nullptr, kUnknownClassDefIndex);
+ // must find references info for object and in HeapLocationCollector.
+ ASSERT_TRUE(ref != nullptr);
+ // must find these heap locations.
+ ASSERT_TRUE(loc1 != HeapLocationCollector::kHeapLocationNotFound);
+ ASSERT_TRUE(loc2 != HeapLocationCollector::kHeapLocationNotFound);
+ // different fields of same object.
+ ASSERT_TRUE(loc1 != loc2);
+ // accesses to different fields of the same object should not alias.
+ ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
+}
+
+} // namespace art
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 48699b33ae..211528b4bd 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "load_store_analysis.h"
#include "load_store_elimination.h"
#include "escape.h"
@@ -23,477 +24,6 @@
namespace art {
-class ReferenceInfo;
-
-// A cap for the number of heap locations to prevent pathological time/space consumption.
-// The number of heap locations for most of the methods stays below this threshold.
-constexpr size_t kMaxNumberOfHeapLocations = 32;
-
-// A ReferenceInfo contains additional info about a reference such as
-// whether it's a singleton, returned, etc.
-class ReferenceInfo : public ArenaObject<kArenaAllocMisc> {
- public:
- ReferenceInfo(HInstruction* reference, size_t pos)
- : reference_(reference),
- position_(pos),
- is_singleton_(true),
- is_singleton_and_not_returned_(true),
- is_singleton_and_not_deopt_visible_(true),
- has_index_aliasing_(false) {
- CalculateEscape(reference_,
- nullptr,
- &is_singleton_,
- &is_singleton_and_not_returned_,
- &is_singleton_and_not_deopt_visible_);
- }
-
- HInstruction* GetReference() const {
- return reference_;
- }
-
- size_t GetPosition() const {
- return position_;
- }
-
- // Returns true if reference_ is the only name that can refer to its value during
- // the lifetime of the method. So it's guaranteed to not have any alias in
- // the method (including its callees).
- bool IsSingleton() const {
- return is_singleton_;
- }
-
- // Returns true if reference_ is a singleton and not returned to the caller or
- // used as an environment local of an HDeoptimize instruction.
- // The allocation and stores into reference_ may be eliminated for such cases.
- bool IsSingletonAndRemovable() const {
- return is_singleton_and_not_returned_ && is_singleton_and_not_deopt_visible_;
- }
-
- // Returns true if reference_ is a singleton and returned to the caller or
- // used as an environment local of an HDeoptimize instruction.
- bool IsSingletonAndNonRemovable() const {
- return is_singleton_ &&
- (!is_singleton_and_not_returned_ || !is_singleton_and_not_deopt_visible_);
- }
-
- bool HasIndexAliasing() {
- return has_index_aliasing_;
- }
-
- void SetHasIndexAliasing(bool has_index_aliasing) {
- // Only allow setting to true.
- DCHECK(has_index_aliasing);
- has_index_aliasing_ = has_index_aliasing;
- }
-
- private:
- HInstruction* const reference_;
- const size_t position_; // position in HeapLocationCollector's ref_info_array_.
-
- // Can only be referred to by a single name in the method.
- bool is_singleton_;
- // Is singleton and not returned to caller.
- bool is_singleton_and_not_returned_;
- // Is singleton and not used as an environment local of HDeoptimize.
- bool is_singleton_and_not_deopt_visible_;
- // Some heap locations with reference_ have array index aliasing,
- // e.g. arr[i] and arr[j] may be the same location.
- bool has_index_aliasing_;
-
- DISALLOW_COPY_AND_ASSIGN(ReferenceInfo);
-};
-
-// A heap location is a reference-offset/index pair that a value can be loaded from
-// or stored to.
-class HeapLocation : public ArenaObject<kArenaAllocMisc> {
- public:
- static constexpr size_t kInvalidFieldOffset = -1;
-
- // TODO: more fine-grained array types.
- static constexpr int16_t kDeclaringClassDefIndexForArrays = -1;
-
- HeapLocation(ReferenceInfo* ref_info,
- size_t offset,
- HInstruction* index,
- int16_t declaring_class_def_index)
- : ref_info_(ref_info),
- offset_(offset),
- index_(index),
- declaring_class_def_index_(declaring_class_def_index),
- value_killed_by_loop_side_effects_(true) {
- DCHECK(ref_info != nullptr);
- DCHECK((offset == kInvalidFieldOffset && index != nullptr) ||
- (offset != kInvalidFieldOffset && index == nullptr));
- if (ref_info->IsSingleton() && !IsArrayElement()) {
- // Assume this location's value cannot be killed by loop side effects
- // until proven otherwise.
- value_killed_by_loop_side_effects_ = false;
- }
- }
-
- ReferenceInfo* GetReferenceInfo() const { return ref_info_; }
- size_t GetOffset() const { return offset_; }
- HInstruction* GetIndex() const { return index_; }
-
- // Returns the definition of declaring class' dex index.
- // It's kDeclaringClassDefIndexForArrays for an array element.
- int16_t GetDeclaringClassDefIndex() const {
- return declaring_class_def_index_;
- }
-
- bool IsArrayElement() const {
- return index_ != nullptr;
- }
-
- bool IsValueKilledByLoopSideEffects() const {
- return value_killed_by_loop_side_effects_;
- }
-
- void SetValueKilledByLoopSideEffects(bool val) {
- value_killed_by_loop_side_effects_ = val;
- }
-
- private:
- ReferenceInfo* const ref_info_; // reference for instance/static field or array access.
- const size_t offset_; // offset of static/instance field.
- HInstruction* const index_; // index of an array element.
- const int16_t declaring_class_def_index_; // declaring class's def's dex index.
- bool value_killed_by_loop_side_effects_; // value of this location may be killed by loop
- // side effects because this location is stored
- // into inside a loop. This gives
- // better info on whether a singleton's location
- // value may be killed by loop side effects.
-
- DISALLOW_COPY_AND_ASSIGN(HeapLocation);
-};
-
-static HInstruction* HuntForOriginalReference(HInstruction* ref) {
- DCHECK(ref != nullptr);
- while (ref->IsNullCheck() || ref->IsBoundType()) {
- ref = ref->InputAt(0);
- }
- return ref;
-}
-
-// A HeapLocationCollector collects all relevant heap locations and keeps
-// an aliasing matrix for all locations.
-class HeapLocationCollector : public HGraphVisitor {
- public:
- static constexpr size_t kHeapLocationNotFound = -1;
- // Start with a single uint32_t word. That's enough bits for pair-wise
- // aliasing matrix of 8 heap locations.
- static constexpr uint32_t kInitialAliasingMatrixBitVectorSize = 32;
-
- explicit HeapLocationCollector(HGraph* graph)
- : HGraphVisitor(graph),
- ref_info_array_(graph->GetArena()->Adapter(kArenaAllocLSE)),
- heap_locations_(graph->GetArena()->Adapter(kArenaAllocLSE)),
- aliasing_matrix_(graph->GetArena(),
- kInitialAliasingMatrixBitVectorSize,
- true,
- kArenaAllocLSE),
- has_heap_stores_(false),
- has_volatile_(false),
- has_monitor_operations_(false) {}
-
- size_t GetNumberOfHeapLocations() const {
- return heap_locations_.size();
- }
-
- HeapLocation* GetHeapLocation(size_t index) const {
- return heap_locations_[index];
- }
-
- ReferenceInfo* FindReferenceInfoOf(HInstruction* ref) const {
- for (size_t i = 0; i < ref_info_array_.size(); i++) {
- ReferenceInfo* ref_info = ref_info_array_[i];
- if (ref_info->GetReference() == ref) {
- DCHECK_EQ(i, ref_info->GetPosition());
- return ref_info;
- }
- }
- return nullptr;
- }
-
- bool HasHeapStores() const {
- return has_heap_stores_;
- }
-
- bool HasVolatile() const {
- return has_volatile_;
- }
-
- bool HasMonitorOps() const {
- return has_monitor_operations_;
- }
-
- // Find and return the heap location index in heap_locations_.
- size_t FindHeapLocationIndex(ReferenceInfo* ref_info,
- size_t offset,
- HInstruction* index,
- int16_t declaring_class_def_index) const {
- for (size_t i = 0; i < heap_locations_.size(); i++) {
- HeapLocation* loc = heap_locations_[i];
- if (loc->GetReferenceInfo() == ref_info &&
- loc->GetOffset() == offset &&
- loc->GetIndex() == index &&
- loc->GetDeclaringClassDefIndex() == declaring_class_def_index) {
- return i;
- }
- }
- return kHeapLocationNotFound;
- }
-
- // Returns true if heap_locations_[index1] and heap_locations_[index2] may alias.
- bool MayAlias(size_t index1, size_t index2) const {
- if (index1 < index2) {
- return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index1, index2));
- } else if (index1 > index2) {
- return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index2, index1));
- } else {
- DCHECK(false) << "index1 and index2 are expected to be different";
- return true;
- }
- }
-
- void BuildAliasingMatrix() {
- const size_t number_of_locations = heap_locations_.size();
- if (number_of_locations == 0) {
- return;
- }
- size_t pos = 0;
- // Compute aliasing info between every pair of different heap locations.
- // Save the result in a matrix represented as a BitVector.
- for (size_t i = 0; i < number_of_locations - 1; i++) {
- for (size_t j = i + 1; j < number_of_locations; j++) {
- if (ComputeMayAlias(i, j)) {
- aliasing_matrix_.SetBit(CheckedAliasingMatrixPosition(i, j, pos));
- }
- pos++;
- }
- }
- }
-
- private:
- // An allocation cannot alias with a name which already exists at the point
- // of the allocation, such as a parameter or a load happening before the allocation.
- bool MayAliasWithPreexistenceChecking(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const {
- if (ref_info1->GetReference()->IsNewInstance() || ref_info1->GetReference()->IsNewArray()) {
- // Any reference that can alias with the allocation must appear after it in the block/in
- // the block's successors. In reverse post order, those instructions will be visited after
- // the allocation.
- return ref_info2->GetPosition() >= ref_info1->GetPosition();
- }
- return true;
- }
-
- bool CanReferencesAlias(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const {
- if (ref_info1 == ref_info2) {
- return true;
- } else if (ref_info1->IsSingleton()) {
- return false;
- } else if (ref_info2->IsSingleton()) {
- return false;
- } else if (!MayAliasWithPreexistenceChecking(ref_info1, ref_info2) ||
- !MayAliasWithPreexistenceChecking(ref_info2, ref_info1)) {
- return false;
- }
- return true;
- }
-
- // `index1` and `index2` are indices in the array of collected heap locations.
- // Returns the position in the bit vector that tracks whether the two heap
- // locations may alias.
- size_t AliasingMatrixPosition(size_t index1, size_t index2) const {
- DCHECK(index2 > index1);
- const size_t number_of_locations = heap_locations_.size();
- // It's (num_of_locations - 1) + ... + (num_of_locations - index1) + (index2 - index1 - 1).
- return (number_of_locations * index1 - (1 + index1) * index1 / 2 + (index2 - index1 - 1));
- }
-
- // An additional position is passed in to make sure the calculated position is correct.
- size_t CheckedAliasingMatrixPosition(size_t index1, size_t index2, size_t position) {
- size_t calculated_position = AliasingMatrixPosition(index1, index2);
- DCHECK_EQ(calculated_position, position);
- return calculated_position;
- }
-
- // Compute if two locations may alias to each other.
- bool ComputeMayAlias(size_t index1, size_t index2) const {
- HeapLocation* loc1 = heap_locations_[index1];
- HeapLocation* loc2 = heap_locations_[index2];
- if (loc1->GetOffset() != loc2->GetOffset()) {
- // Either two different instance fields, or one is an instance
- // field and the other is an array element.
- return false;
- }
- if (loc1->GetDeclaringClassDefIndex() != loc2->GetDeclaringClassDefIndex()) {
- // Different types.
- return false;
- }
- if (!CanReferencesAlias(loc1->GetReferenceInfo(), loc2->GetReferenceInfo())) {
- return false;
- }
- if (loc1->IsArrayElement() && loc2->IsArrayElement()) {
- HInstruction* array_index1 = loc1->GetIndex();
- HInstruction* array_index2 = loc2->GetIndex();
- DCHECK(array_index1 != nullptr);
- DCHECK(array_index2 != nullptr);
- if (array_index1->IsIntConstant() &&
- array_index2->IsIntConstant() &&
- array_index1->AsIntConstant()->GetValue() != array_index2->AsIntConstant()->GetValue()) {
- // Different constant indices do not alias.
- return false;
- }
- ReferenceInfo* ref_info = loc1->GetReferenceInfo();
- ref_info->SetHasIndexAliasing(true);
- }
- return true;
- }
-
- ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* instruction) {
- ReferenceInfo* ref_info = FindReferenceInfoOf(instruction);
- if (ref_info == nullptr) {
- size_t pos = ref_info_array_.size();
- ref_info = new (GetGraph()->GetArena()) ReferenceInfo(instruction, pos);
- ref_info_array_.push_back(ref_info);
- }
- return ref_info;
- }
-
- void CreateReferenceInfoForReferenceType(HInstruction* instruction) {
- if (instruction->GetType() != Primitive::kPrimNot) {
- return;
- }
- DCHECK(FindReferenceInfoOf(instruction) == nullptr);
- GetOrCreateReferenceInfo(instruction);
- }
-
- HeapLocation* GetOrCreateHeapLocation(HInstruction* ref,
- size_t offset,
- HInstruction* index,
- int16_t declaring_class_def_index) {
- HInstruction* original_ref = HuntForOriginalReference(ref);
- ReferenceInfo* ref_info = GetOrCreateReferenceInfo(original_ref);
- size_t heap_location_idx = FindHeapLocationIndex(
- ref_info, offset, index, declaring_class_def_index);
- if (heap_location_idx == kHeapLocationNotFound) {
- HeapLocation* heap_loc = new (GetGraph()->GetArena())
- HeapLocation(ref_info, offset, index, declaring_class_def_index);
- heap_locations_.push_back(heap_loc);
- return heap_loc;
- }
- return heap_locations_[heap_location_idx];
- }
-
- HeapLocation* VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) {
- if (field_info.IsVolatile()) {
- has_volatile_ = true;
- }
- const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex();
- const size_t offset = field_info.GetFieldOffset().SizeValue();
- return GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index);
- }
-
- void VisitArrayAccess(HInstruction* array, HInstruction* index) {
- GetOrCreateHeapLocation(array, HeapLocation::kInvalidFieldOffset,
- index, HeapLocation::kDeclaringClassDefIndexForArrays);
- }
-
- void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE {
- VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
- CreateReferenceInfoForReferenceType(instruction);
- }
-
- void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
- HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
- has_heap_stores_ = true;
- if (location->GetReferenceInfo()->IsSingleton()) {
- // A singleton's location value may be killed by loop side effects if it's
- // defined before that loop, and it's stored into inside that loop.
- HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation();
- if (loop_info != nullptr) {
- HInstruction* ref = location->GetReferenceInfo()->GetReference();
- DCHECK(ref->IsNewInstance());
- if (loop_info->IsDefinedOutOfTheLoop(ref)) {
- // ref's location value may be killed by this loop's side effects.
- location->SetValueKilledByLoopSideEffects(true);
- } else {
- // ref is defined inside this loop so this loop's side effects cannot
- // kill its location value at the loop header since ref/its location doesn't
- // exist yet at the loop header.
- }
- }
- } else {
- // For non-singletons, value_killed_by_loop_side_effects_ is inited to
- // true.
- DCHECK_EQ(location->IsValueKilledByLoopSideEffects(), true);
- }
- }
-
- void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE {
- VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
- CreateReferenceInfoForReferenceType(instruction);
- }
-
- void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE {
- VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
- has_heap_stores_ = true;
- }
-
- // We intentionally don't collect HUnresolvedInstanceField/HUnresolvedStaticField accesses
- // since we cannot accurately track the fields.
-
- void VisitArrayGet(HArrayGet* instruction) OVERRIDE {
- VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1));
- CreateReferenceInfoForReferenceType(instruction);
- }
-
- void VisitArraySet(HArraySet* instruction) OVERRIDE {
- VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1));
- has_heap_stores_ = true;
- }
-
- void VisitNewInstance(HNewInstance* new_instance) OVERRIDE {
- // Any references appearing in the ref_info_array_ so far cannot alias with new_instance.
- CreateReferenceInfoForReferenceType(new_instance);
- }
-
- void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE {
- CreateReferenceInfoForReferenceType(instruction);
- }
-
- void VisitInvokeVirtual(HInvokeVirtual* instruction) OVERRIDE {
- CreateReferenceInfoForReferenceType(instruction);
- }
-
- void VisitInvokeInterface(HInvokeInterface* instruction) OVERRIDE {
- CreateReferenceInfoForReferenceType(instruction);
- }
-
- void VisitParameterValue(HParameterValue* instruction) OVERRIDE {
- CreateReferenceInfoForReferenceType(instruction);
- }
-
- void VisitSelect(HSelect* instruction) OVERRIDE {
- CreateReferenceInfoForReferenceType(instruction);
- }
-
- void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) OVERRIDE {
- has_monitor_operations_ = true;
- }
-
- ArenaVector<ReferenceInfo*> ref_info_array_; // All references used for heap accesses.
- ArenaVector<HeapLocation*> heap_locations_; // All heap locations.
- ArenaBitVector aliasing_matrix_; // aliasing info between each pair of locations.
- bool has_heap_stores_; // If there is no heap stores, LSE acts as GVN with better
- // alias analysis and won't be as effective.
- bool has_volatile_; // If there are volatile field accesses.
- bool has_monitor_operations_; // If there are monitor operations.
-
- DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector);
-};
-
// An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
// A heap location can be set to kUnknownHeapValue when:
// - initially set a value.
@@ -516,7 +46,7 @@ class LSEVisitor : public HGraphVisitor {
side_effects_(side_effects),
heap_values_for_(graph->GetBlocks().size(),
ArenaVector<HInstruction*>(heap_locations_collector.
- GetNumberOfHeapLocations(),
+ GetNumberOfHeapLocations(),
kUnknownHeapValue,
graph->GetArena()->Adapter(kArenaAllocLSE)),
graph->GetArena()->Adapter(kArenaAllocLSE)),
@@ -566,14 +96,20 @@ class LSEVisitor : public HGraphVisitor {
store->GetBlock()->RemoveInstruction(store);
}
- // Eliminate allocations that are not used.
+ // Eliminate singleton-classified instructions:
+ // * - Constructor fences (they never escape this thread).
+ // * - Allocations (if they are unused).
for (HInstruction* new_instance : singleton_new_instances_) {
+ HConstructorFence::RemoveConstructorFences(new_instance);
+
if (!new_instance->HasNonEnvironmentUses()) {
new_instance->RemoveEnvironmentUsers();
new_instance->GetBlock()->RemoveInstruction(new_instance);
}
}
for (HInstruction* new_array : singleton_new_arrays_) {
+ HConstructorFence::RemoveConstructorFences(new_array);
+
if (!new_array->HasNonEnvironmentUses()) {
new_array->RemoveEnvironmentUsers();
new_array->GetBlock()->RemoveInstruction(new_array);
@@ -754,7 +290,7 @@ class LSEVisitor : public HGraphVisitor {
size_t offset,
HInstruction* index,
int16_t declaring_class_def_index) {
- HInstruction* original_ref = HuntForOriginalReference(ref);
+ HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref);
ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref);
size_t idx = heap_location_collector_.FindHeapLocationIndex(
ref_info, offset, index, declaring_class_def_index);
@@ -821,7 +357,7 @@ class LSEVisitor : public HGraphVisitor {
HInstruction* index,
int16_t declaring_class_def_index,
HInstruction* value) {
- HInstruction* original_ref = HuntForOriginalReference(ref);
+ HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref);
ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref);
size_t idx = heap_location_collector_.FindHeapLocationIndex(
ref_info, offset, index, declaring_class_def_index);
@@ -1121,25 +657,12 @@ void LoadStoreElimination::Run() {
// Skip this optimization.
return;
}
- HeapLocationCollector heap_location_collector(graph_);
- for (HBasicBlock* block : graph_->GetReversePostOrder()) {
- heap_location_collector.VisitBasicBlock(block);
- }
- if (heap_location_collector.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) {
- // Bail out if there are too many heap locations to deal with.
- return;
- }
- if (!heap_location_collector.HasHeapStores()) {
- // Without heap stores, this pass would act mostly as GVN on heap accesses.
+ const HeapLocationCollector& heap_location_collector = lsa_.GetHeapLocationCollector();
+ if (heap_location_collector.GetNumberOfHeapLocations() == 0) {
+ // No HeapLocation information from LSA, skip this optimization.
return;
}
- if (heap_location_collector.HasVolatile() || heap_location_collector.HasMonitorOps()) {
- // Don't do load/store elimination if the method has volatile field accesses or
- // monitor operations, for now.
- // TODO: do it right.
- return;
- }
- heap_location_collector.BuildAliasingMatrix();
+
LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_);
for (HBasicBlock* block : graph_->GetReversePostOrder()) {
lse_visitor.VisitBasicBlock(block);
diff --git a/compiler/optimizing/load_store_elimination.h b/compiler/optimizing/load_store_elimination.h
index 1d9e5c8da6..efe71c733a 100644
--- a/compiler/optimizing/load_store_elimination.h
+++ b/compiler/optimizing/load_store_elimination.h
@@ -22,12 +22,16 @@
namespace art {
class SideEffectsAnalysis;
+class LoadStoreAnalysis;
class LoadStoreElimination : public HOptimization {
public:
- LoadStoreElimination(HGraph* graph, const SideEffectsAnalysis& side_effects)
+ LoadStoreElimination(HGraph* graph,
+ const SideEffectsAnalysis& side_effects,
+ const LoadStoreAnalysis& lsa)
: HOptimization(graph, kLoadStoreEliminationPassName),
- side_effects_(side_effects) {}
+ side_effects_(side_effects),
+ lsa_(lsa) {}
void Run() OVERRIDE;
@@ -35,6 +39,7 @@ class LoadStoreElimination : public HOptimization {
private:
const SideEffectsAnalysis& side_effects_;
+ const LoadStoreAnalysis& lsa_;
DISALLOW_COPY_AND_ASSIGN(LoadStoreElimination);
};
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index d5e105951b..4334ab10bd 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -71,7 +71,7 @@ static bool IsSignExtensionAndGet(HInstruction* instruction,
// extension when represented in the *width* of the given narrower data type
// (the fact that char normally zero extends does not matter here).
int64_t value = 0;
- if (IsInt64AndGet(instruction, &value)) {
+ if (IsInt64AndGet(instruction, /*out*/ &value)) {
switch (type) {
case Primitive::kPrimByte:
if (std::numeric_limits<int8_t>::min() <= value &&
@@ -119,7 +119,7 @@ static bool IsZeroExtensionAndGet(HInstruction* instruction,
// extension when represented in the *width* of the given narrower data type
// (the fact that byte/short normally sign extend does not matter here).
int64_t value = 0;
- if (IsInt64AndGet(instruction, &value)) {
+ if (IsInt64AndGet(instruction, /*out*/ &value)) {
switch (type) {
case Primitive::kPrimByte:
if (std::numeric_limits<uint8_t>::min() <= value &&
@@ -173,6 +173,84 @@ static bool IsZeroExtensionAndGet(HInstruction* instruction,
return false;
}
+// Detect situations with same-extension narrower operands.
+// Returns true on success and sets is_unsigned accordingly.
+static bool IsNarrowerOperands(HInstruction* a,
+ HInstruction* b,
+ Primitive::Type type,
+ /*out*/ HInstruction** r,
+ /*out*/ HInstruction** s,
+ /*out*/ bool* is_unsigned) {
+ if (IsSignExtensionAndGet(a, type, r) && IsSignExtensionAndGet(b, type, s)) {
+ *is_unsigned = false;
+ return true;
+ } else if (IsZeroExtensionAndGet(a, type, r) && IsZeroExtensionAndGet(b, type, s)) {
+ *is_unsigned = true;
+ return true;
+ }
+ return false;
+}
+
+// As above, single operand.
+static bool IsNarrowerOperand(HInstruction* a,
+ Primitive::Type type,
+ /*out*/ HInstruction** r,
+ /*out*/ bool* is_unsigned) {
+ if (IsSignExtensionAndGet(a, type, r)) {
+ *is_unsigned = false;
+ return true;
+ } else if (IsZeroExtensionAndGet(a, type, r)) {
+ *is_unsigned = true;
+ return true;
+ }
+ return false;
+}
+
+// Detect up to two instructions a and b, and an acccumulated constant c.
+static bool IsAddConstHelper(HInstruction* instruction,
+ /*out*/ HInstruction** a,
+ /*out*/ HInstruction** b,
+ /*out*/ int64_t* c,
+ int32_t depth) {
+ static constexpr int32_t kMaxDepth = 8; // don't search too deep
+ int64_t value = 0;
+ if (IsInt64AndGet(instruction, &value)) {
+ *c += value;
+ return true;
+ } else if (instruction->IsAdd() && depth <= kMaxDepth) {
+ return IsAddConstHelper(instruction->InputAt(0), a, b, c, depth + 1) &&
+ IsAddConstHelper(instruction->InputAt(1), a, b, c, depth + 1);
+ } else if (*a == nullptr) {
+ *a = instruction;
+ return true;
+ } else if (*b == nullptr) {
+ *b = instruction;
+ return true;
+ }
+ return false; // too many non-const operands
+}
+
+// Detect a + b + c for an optional constant c.
+static bool IsAddConst(HInstruction* instruction,
+ /*out*/ HInstruction** a,
+ /*out*/ HInstruction** b,
+ /*out*/ int64_t* c) {
+ if (instruction->IsAdd()) {
+ // Try to find a + b and accumulated c.
+ if (IsAddConstHelper(instruction->InputAt(0), a, b, c, /*depth*/ 0) &&
+ IsAddConstHelper(instruction->InputAt(1), a, b, c, /*depth*/ 0) &&
+ *b != nullptr) {
+ return true;
+ }
+ // Found a + b.
+ *a = instruction->InputAt(0);
+ *b = instruction->InputAt(1);
+ *c = 0;
+ return true;
+ }
+ return false;
+}
+
// Test vector restrictions.
static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
return (restrictions & tested) != 0;
@@ -733,7 +811,7 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node,
return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite();
}
-// TODO: more operations and intrinsics, detect saturation arithmetic, etc.
+// TODO: saturation arithmetic.
bool HLoopOptimization::VectorizeUse(LoopNode* node,
HInstruction* instruction,
bool generate_code,
@@ -755,10 +833,9 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
}
return true;
} else if (instruction->IsArrayGet()) {
- // Strings are different, with a different offset to the actual data
- // and some compressed to save memory. For now, all cases are rejected
- // to avoid the complexity.
- if (instruction->AsArrayGet()->IsStringCharAt()) {
+ // Deal with vector restrictions.
+ if (instruction->AsArrayGet()->IsStringCharAt() &&
+ HasVectorRestrictions(restrictions, kNoStringCharAt)) {
return false;
}
// Accept a right-hand-side array base[index] for
@@ -850,30 +927,38 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
return true;
}
// Deal with vector restrictions.
+ HInstruction* opa = instruction->InputAt(0);
+ HInstruction* opb = instruction->InputAt(1);
+ HInstruction* r = opa;
+ bool is_unsigned = false;
if ((HasVectorRestrictions(restrictions, kNoShift)) ||
(instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) {
return false; // unsupported instruction
- } else if ((instruction->IsShr() || instruction->IsUShr()) &&
- HasVectorRestrictions(restrictions, kNoHiBits)) {
- return false; // hibits may impact lobits; TODO: we can do better!
+ } else if (HasVectorRestrictions(restrictions, kNoHiBits)) {
+ // Shifts right need extra care to account for higher order bits.
+ // TODO: less likely shr/unsigned and ushr/signed can by flipping signess.
+ if (instruction->IsShr() &&
+ (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) {
+ return false; // reject, unless all operands are sign-extension narrower
+ } else if (instruction->IsUShr() &&
+ (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || !is_unsigned)) {
+ return false; // reject, unless all operands are zero-extension narrower
+ }
}
// Accept shift operator for vectorizable/invariant operands.
// TODO: accept symbolic, albeit loop invariant shift factors.
- HInstruction* opa = instruction->InputAt(0);
- HInstruction* opb = instruction->InputAt(1);
- int64_t value = 0;
- if (VectorizeUse(node, opa, generate_code, type, restrictions) && IsInt64AndGet(opb, &value)) {
- // Make sure shift distance only looks at lower bits, as defined for sequential shifts.
- int64_t mask = (instruction->GetType() == Primitive::kPrimLong)
- ? kMaxLongShiftDistance
- : kMaxIntShiftDistance;
- int64_t distance = value & mask;
+ DCHECK(r != nullptr);
+ if (generate_code && vector_mode_ != kVector) { // de-idiom
+ r = opa;
+ }
+ int64_t distance = 0;
+ if (VectorizeUse(node, r, generate_code, type, restrictions) &&
+ IsInt64AndGet(opb, /*out*/ &distance)) {
// Restrict shift distance to packed data type width.
int64_t max_distance = Primitive::ComponentSize(type) * 8;
if (0 <= distance && distance < max_distance) {
if (generate_code) {
- HInstruction* s = graph_->GetIntConstant(distance);
- GenerateVecOp(instruction, vector_map_->Get(opa), s, type);
+ GenerateVecOp(instruction, vector_map_->Get(r), opb, type);
}
return true;
}
@@ -887,16 +972,59 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
case Intrinsics::kMathAbsFloat:
case Intrinsics::kMathAbsDouble: {
// Deal with vector restrictions.
- if (HasVectorRestrictions(restrictions, kNoAbs) ||
- HasVectorRestrictions(restrictions, kNoHiBits)) {
- // TODO: we can do better for some hibits cases.
+ HInstruction* opa = instruction->InputAt(0);
+ HInstruction* r = opa;
+ bool is_unsigned = false;
+ if (HasVectorRestrictions(restrictions, kNoAbs)) {
return false;
+ } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
+ (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) {
+ return false; // reject, unless operand is sign-extension narrower
}
// Accept ABS(x) for vectorizable operand.
+ DCHECK(r != nullptr);
+ if (generate_code && vector_mode_ != kVector) { // de-idiom
+ r = opa;
+ }
+ if (VectorizeUse(node, r, generate_code, type, restrictions)) {
+ if (generate_code) {
+ GenerateVecOp(instruction, vector_map_->Get(r), nullptr, type);
+ }
+ return true;
+ }
+ return false;
+ }
+ case Intrinsics::kMathMinIntInt:
+ case Intrinsics::kMathMinLongLong:
+ case Intrinsics::kMathMinFloatFloat:
+ case Intrinsics::kMathMinDoubleDouble:
+ case Intrinsics::kMathMaxIntInt:
+ case Intrinsics::kMathMaxLongLong:
+ case Intrinsics::kMathMaxFloatFloat:
+ case Intrinsics::kMathMaxDoubleDouble: {
+ // Deal with vector restrictions.
HInstruction* opa = instruction->InputAt(0);
- if (VectorizeUse(node, opa, generate_code, type, restrictions)) {
+ HInstruction* opb = instruction->InputAt(1);
+ HInstruction* r = opa;
+ HInstruction* s = opb;
+ bool is_unsigned = false;
+ if (HasVectorRestrictions(restrictions, kNoMinMax)) {
+ return false;
+ } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
+ !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) {
+ return false; // reject, unless all operands are same-extension narrower
+ }
+ // Accept MIN/MAX(x, y) for vectorizable operands.
+ DCHECK(r != nullptr && s != nullptr);
+ if (generate_code && vector_mode_ != kVector) { // de-idiom
+ r = opa;
+ s = opb;
+ }
+ if (VectorizeUse(node, r, generate_code, type, restrictions) &&
+ VectorizeUse(node, s, generate_code, type, restrictions)) {
if (generate_code) {
- GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type);
+ GenerateVecOp(
+ instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned);
}
return true;
}
@@ -921,17 +1049,17 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv | kNoAbs;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoAbs;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
*restrictions |= kNoDiv;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoDiv | kNoMul;
+ *restrictions |= kNoDiv | kNoMul | kNoMinMax;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
return TrySetVectorLength(4);
@@ -957,11 +1085,13 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
*restrictions |= kNoDiv;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs;
+ *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
+ *restrictions |= kNoMinMax; // -0.0 vs +0.0
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
+ *restrictions |= kNoMinMax; // -0.0 vs +0.0
return TrySetVectorLength(2);
default:
break;
@@ -969,9 +1099,36 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
}
return false;
case kMips:
- case kMips64:
// TODO: implement MIPS SIMD.
return false;
+ case kMips64:
+ if (features->AsMips64InstructionSetFeatures()->HasMsa()) {
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ *restrictions |= kNoDiv | kNoMinMax;
+ return TrySetVectorLength(16);
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ *restrictions |= kNoDiv | kNoMinMax | kNoStringCharAt;
+ return TrySetVectorLength(8);
+ case Primitive::kPrimInt:
+ *restrictions |= kNoDiv | kNoMinMax;
+ return TrySetVectorLength(4);
+ case Primitive::kPrimLong:
+ *restrictions |= kNoDiv | kNoMinMax;
+ return TrySetVectorLength(2);
+ case Primitive::kPrimFloat:
+ *restrictions |= kNoMinMax;
+ return TrySetVectorLength(4);
+ case Primitive::kPrimDouble:
+ *restrictions |= kNoMinMax;
+ return TrySetVectorLength(2);
+ default:
+ break;
+ } // switch type
+ }
+ return false;
default:
return false;
} // switch instruction set
@@ -1058,13 +1215,14 @@ void HLoopOptimization::GenerateVecMem(HInstruction* org,
void HLoopOptimization::GenerateVecOp(HInstruction* org,
HInstruction* opa,
HInstruction* opb,
- Primitive::Type type) {
+ Primitive::Type type,
+ bool is_unsigned) {
if (vector_mode_ == kSequential) {
- // Scalar code follows implicit integral promotion.
- if (type == Primitive::kPrimBoolean ||
- type == Primitive::kPrimByte ||
- type == Primitive::kPrimChar ||
- type == Primitive::kPrimShort) {
+ // Non-converting scalar code follows implicit integral promotion.
+ if (!org->IsTypeConversion() && (type == Primitive::kPrimBoolean ||
+ type == Primitive::kPrimByte ||
+ type == Primitive::kPrimChar ||
+ type == Primitive::kPrimShort)) {
type = Primitive::kPrimInt;
}
}
@@ -1141,6 +1299,22 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
DCHECK(opb == nullptr);
vector = new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_);
break;
+ case Intrinsics::kMathMinIntInt:
+ case Intrinsics::kMathMinLongLong:
+ case Intrinsics::kMathMinFloatFloat:
+ case Intrinsics::kMathMinDoubleDouble: {
+ vector = new (global_allocator_)
+ HVecMin(global_allocator_, opa, opb, type, vector_length_, is_unsigned);
+ break;
+ }
+ case Intrinsics::kMathMaxIntInt:
+ case Intrinsics::kMathMaxLongLong:
+ case Intrinsics::kMathMaxFloatFloat:
+ case Intrinsics::kMathMaxDoubleDouble: {
+ vector = new (global_allocator_)
+ HVecMax(global_allocator_, opa, opb, type, vector_length_, is_unsigned);
+ break;
+ }
default:
LOG(FATAL) << "Unsupported SIMD intrinsic";
UNREACHABLE();
@@ -1150,9 +1324,10 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
// corresponding new scalar instructions in the loop. The instruction will get an
// environment while being inserted from the instruction map in original program order.
DCHECK(vector_mode_ == kSequential);
+ size_t num_args = invoke->GetNumberOfArguments();
HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect(
global_allocator_,
- invoke->GetNumberOfArguments(),
+ num_args,
invoke->GetType(),
invoke->GetDexPc(),
invoke->GetDexMethodIndex(),
@@ -1162,8 +1337,14 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
invoke->GetTargetMethod(),
invoke->GetClinitCheckRequirement());
HInputsRef inputs = invoke->GetInputs();
- for (size_t index = 0; index < inputs.size(); ++index) {
- new_invoke->SetArgumentAt(index, vector_map_->Get(inputs[index]));
+ size_t num_inputs = inputs.size();
+ DCHECK_LE(num_args, num_inputs);
+ DCHECK_EQ(num_inputs, new_invoke->GetInputs().size()); // both invokes agree
+ for (size_t index = 0; index < num_inputs; ++index) {
+ HInstruction* new_input = index < num_args
+ ? vector_map_->Get(inputs[index])
+ : inputs[index]; // beyond arguments: just pass through
+ new_invoke->SetArgumentAt(index, new_input);
}
new_invoke->SetIntrinsic(invoke->GetIntrinsic(),
kNeedsEnvironmentOrCache,
@@ -1200,34 +1381,30 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
Primitive::Type type,
uint64_t restrictions) {
// Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1
- // (note whether the sign bit in higher precision is shifted in has no effect
+ // (note whether the sign bit in wider precision is shifted in has no effect
// on the narrow precision computed by the idiom).
- int64_t value = 0;
+ int64_t distance = 0;
if ((instruction->IsShr() ||
instruction->IsUShr()) &&
- IsInt64AndGet(instruction->InputAt(1), &value) && value == 1) {
- //
- // TODO: make following code less sensitive to associativity and commutativity differences.
- //
- HInstruction* x = instruction->InputAt(0);
- // Test for an optional rounding part (x + 1) >> 1.
- bool is_rounded = false;
- if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), &value) && value == 1) {
- x = x->InputAt(0);
- is_rounded = true;
- }
- // Test for a core addition (a + b) >> 1 (possibly rounded), either unsigned or signed.
- if (x->IsAdd()) {
- HInstruction* a = x->InputAt(0);
- HInstruction* b = x->InputAt(1);
+ IsInt64AndGet(instruction->InputAt(1), /*out*/ &distance) && distance == 1) {
+ // Test for (a + b + c) >> 1 for optional constant c.
+ HInstruction* a = nullptr;
+ HInstruction* b = nullptr;
+ int64_t c = 0;
+ if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) {
+ DCHECK(a != nullptr && b != nullptr);
+ // Accept c == 1 (rounded) or c == 0 (not rounded).
+ bool is_rounded = false;
+ if (c == 1) {
+ is_rounded = true;
+ } else if (c != 0) {
+ return false;
+ }
+ // Accept consistent zero or sign extension on operands a and b.
HInstruction* r = nullptr;
HInstruction* s = nullptr;
bool is_unsigned = false;
- if (IsZeroExtensionAndGet(a, type, &r) && IsZeroExtensionAndGet(b, type, &s)) {
- is_unsigned = true;
- } else if (IsSignExtensionAndGet(a, type, &r) && IsSignExtensionAndGet(b, type, &s)) {
- is_unsigned = false;
- } else {
+ if (!IsNarrowerOperands(a, b, type, &r, &s, &is_unsigned)) {
return false;
}
// Deal with vector restrictions.
@@ -1238,6 +1415,10 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
// Accept recognized halving add for vectorizable operands. Vectorized code uses the
// shorthand idiomatic operation. Sequential code uses the original scalar expressions.
DCHECK(r != nullptr && s != nullptr);
+ if (generate_code && vector_mode_ != kVector) { // de-idiom
+ r = instruction->InputAt(0);
+ s = instruction->InputAt(1);
+ }
if (VectorizeUse(node, r, generate_code, type, restrictions) &&
VectorizeUse(node, s, generate_code, type, restrictions)) {
if (generate_code) {
@@ -1251,12 +1432,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
is_unsigned,
is_rounded));
} else {
- VectorizeUse(node, instruction->InputAt(0), generate_code, type, restrictions);
- VectorizeUse(node, instruction->InputAt(1), generate_code, type, restrictions);
- GenerateVecOp(instruction,
- vector_map_->Get(instruction->InputAt(0)),
- vector_map_->Get(instruction->InputAt(1)),
- type);
+ GenerateVecOp(instruction, vector_map_->Get(r), vector_map_->Get(s), type);
}
}
return true;
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index c3b0b5d996..cc6343aeb5 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -71,6 +71,8 @@ class HLoopOptimization : public HOptimization {
kNoSignedHAdd = 32, // no signed halving add
kNoUnroundedHAdd = 64, // no unrounded halving add
kNoAbs = 128, // no absolute value
+ kNoMinMax = 256, // no min/max
+ kNoStringCharAt = 512, // no StringCharAt
};
/*
@@ -136,7 +138,11 @@ class HLoopOptimization : public HOptimization {
HInstruction* opa,
HInstruction* opb,
Primitive::Type type);
- void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type);
+ void GenerateVecOp(HInstruction* org,
+ HInstruction* opa,
+ HInstruction* opb,
+ Primitive::Type type,
+ bool is_unsigned = false);
// Vectorization idioms.
bool VectorizeHalvingAddIdiom(LoopNode* node,
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index cd05a884e9..9a91287670 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -528,6 +528,15 @@ HCurrentMethod* HGraph::GetCurrentMethod() {
return cached_current_method_;
}
+const char* HGraph::GetMethodName() const {
+ const DexFile::MethodId& method_id = dex_file_.GetMethodId(method_idx_);
+ return dex_file_.GetMethodName(method_id);
+}
+
+std::string HGraph::PrettyMethod(bool with_signature) const {
+ return dex_file_.PrettyMethod(method_idx_, with_signature);
+}
+
HConstant* HGraph::GetConstant(Primitive::Type type, int64_t value, uint32_t dex_pc) {
switch (type) {
case Primitive::Type::kPrimBoolean:
@@ -1150,6 +1159,95 @@ void HVariableInputSizeInstruction::RemoveInputAt(size_t index) {
}
}
+void HVariableInputSizeInstruction::RemoveAllInputs() {
+ RemoveAsUserOfAllInputs();
+ DCHECK(!HasNonEnvironmentUses());
+
+ inputs_.clear();
+ DCHECK_EQ(0u, InputCount());
+}
+
+void HConstructorFence::RemoveConstructorFences(HInstruction* instruction) {
+ DCHECK(instruction->GetBlock() != nullptr);
+ // Removing constructor fences only makes sense for instructions with an object return type.
+ DCHECK_EQ(Primitive::kPrimNot, instruction->GetType());
+
+ // Efficient implementation that simultaneously (in one pass):
+ // * Scans the uses list for all constructor fences.
+ // * Deletes that constructor fence from the uses list of `instruction`.
+ // * Deletes `instruction` from the constructor fence's inputs.
+ // * Deletes the constructor fence if it now has 0 inputs.
+
+ const HUseList<HInstruction*>& uses = instruction->GetUses();
+ // Warning: Although this is "const", we might mutate the list when calling RemoveInputAt.
+ for (auto it = uses.begin(), end = uses.end(); it != end; ) {
+ const HUseListNode<HInstruction*>& use_node = *it;
+ HInstruction* const use_instruction = use_node.GetUser();
+
+ // Advance the iterator immediately once we fetch the use_node.
+ // Warning: If the input is removed, the current iterator becomes invalid.
+ ++it;
+
+ if (use_instruction->IsConstructorFence()) {
+ HConstructorFence* ctor_fence = use_instruction->AsConstructorFence();
+ size_t input_index = use_node.GetIndex();
+
+ // Process the candidate instruction for removal
+ // from the graph.
+
+ // Constructor fence instructions are never
+ // used by other instructions.
+ //
+ // If we wanted to make this more generic, it
+ // could be a runtime if statement.
+ DCHECK(!ctor_fence->HasUses());
+
+ // A constructor fence's return type is "kPrimVoid"
+ // and therefore it can't have any environment uses.
+ DCHECK(!ctor_fence->HasEnvironmentUses());
+
+ // Remove the inputs first, otherwise removing the instruction
+ // will try to remove its uses while we are already removing uses
+ // and this operation will fail.
+ DCHECK_EQ(instruction, ctor_fence->InputAt(input_index));
+
+ // Removing the input will also remove the `use_node`.
+ // (Do not look at `use_node` after this, it will be a dangling reference).
+ ctor_fence->RemoveInputAt(input_index);
+
+ // Once all inputs are removed, the fence is considered dead and
+ // is removed.
+ if (ctor_fence->InputCount() == 0u) {
+ ctor_fence->GetBlock()->RemoveInstruction(ctor_fence);
+ }
+ }
+ }
+
+ if (kIsDebugBuild) {
+ // Post-condition checks:
+ // * None of the uses of `instruction` are a constructor fence.
+ // * The `instruction` itself did not get removed from a block.
+ for (const HUseListNode<HInstruction*>& use_node : instruction->GetUses()) {
+ CHECK(!use_node.GetUser()->IsConstructorFence());
+ }
+ CHECK(instruction->GetBlock() != nullptr);
+ }
+}
+
+HInstruction* HConstructorFence::GetAssociatedAllocation() {
+ HInstruction* new_instance_inst = GetPrevious();
+ // Check if the immediately preceding instruction is a new-instance/new-array.
+ // Otherwise this fence is for protecting final fields.
+ if (new_instance_inst != nullptr &&
+ (new_instance_inst->IsNewInstance() || new_instance_inst->IsNewArray())) {
+ // TODO: Need to update this code to handle multiple inputs.
+ DCHECK_EQ(InputCount(), 1u);
+ return new_instance_inst;
+ } else {
+ return nullptr;
+ }
+}
+
#define DEFINE_ACCEPT(name, super) \
void H##name::Accept(HGraphVisitor* visitor) { \
visitor->Visit##name(this); \
@@ -2538,15 +2636,17 @@ bool HInvokeStaticOrDirect::NeedsDexCacheOfDeclaringClass() const {
std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs) {
switch (rhs) {
case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
- return os << "string_init";
+ return os << "StringInit";
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
- return os << "recursive";
+ return os << "Recursive";
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
+ return os << "BootImageLinkTimePcRelative";
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
- return os << "direct";
+ return os << "DirectAddress";
case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
- return os << "dex_cache_pc_relative";
+ return os << "DexCachePcRelative";
case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod:
- return os << "dex_cache_via_method";
+ return os << "DexCacheViaMethod";
default:
LOG(FATAL) << "Unknown MethodLoadKind: " << static_cast<int>(rhs);
UNREACHABLE();
@@ -2590,7 +2690,7 @@ bool HLoadClass::InstructionDataEquals(const HInstruction* other) const {
void HLoadClass::SetLoadKind(LoadKind load_kind) {
SetPackedField<LoadKindField>(load_kind);
- if (load_kind != LoadKind::kDexCacheViaMethod &&
+ if (load_kind != LoadKind::kRuntimeCall &&
load_kind != LoadKind::kReferrersClass) {
RemoveAsUserOfInput(0u);
SetRawInputAt(0u, nullptr);
@@ -2606,8 +2706,6 @@ std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) {
switch (rhs) {
case HLoadClass::LoadKind::kReferrersClass:
return os << "ReferrersClass";
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
- return os << "BootImageLinkTimeAddress";
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
return os << "BootImageLinkTimePcRelative";
case HLoadClass::LoadKind::kBootImageAddress:
@@ -2616,8 +2714,8 @@ std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) {
return os << "BssEntry";
case HLoadClass::LoadKind::kJitTableAddress:
return os << "JitTableAddress";
- case HLoadClass::LoadKind::kDexCacheViaMethod:
- return os << "DexCacheViaMethod";
+ case HLoadClass::LoadKind::kRuntimeCall:
+ return os << "RuntimeCall";
default:
LOG(FATAL) << "Unknown HLoadClass::LoadKind: " << static_cast<int>(rhs);
UNREACHABLE();
@@ -2645,10 +2743,10 @@ bool HLoadString::InstructionDataEquals(const HInstruction* other) const {
void HLoadString::SetLoadKind(LoadKind load_kind) {
// Once sharpened, the load kind should not be changed again.
- DCHECK_EQ(GetLoadKind(), LoadKind::kDexCacheViaMethod);
+ DCHECK_EQ(GetLoadKind(), LoadKind::kRuntimeCall);
SetPackedField<LoadKindField>(load_kind);
- if (load_kind != LoadKind::kDexCacheViaMethod) {
+ if (load_kind != LoadKind::kRuntimeCall) {
RemoveAsUserOfInput(0u);
SetRawInputAt(0u, nullptr);
}
@@ -2660,8 +2758,6 @@ void HLoadString::SetLoadKind(LoadKind load_kind) {
std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) {
switch (rhs) {
- case HLoadString::LoadKind::kBootImageLinkTimeAddress:
- return os << "BootImageLinkTimeAddress";
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
return os << "BootImageLinkTimePcRelative";
case HLoadString::LoadKind::kBootImageAddress:
@@ -2670,8 +2766,8 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) {
return os << "BssEntry";
case HLoadString::LoadKind::kJitTableAddress:
return os << "JitTableAddress";
- case HLoadString::LoadKind::kDexCacheViaMethod:
- return os << "DexCacheViaMethod";
+ case HLoadString::LoadKind::kRuntimeCall:
+ return os << "RuntimeCall";
default:
LOG(FATAL) << "Unknown HLoadString::LoadKind: " << static_cast<int>(rhs);
UNREACHABLE();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 872b9083fe..befd0ff97b 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -47,6 +47,7 @@ namespace art {
class GraphChecker;
class HBasicBlock;
+class HConstructorFence;
class HCurrentMethod;
class HDoubleConstant;
class HEnvironment;
@@ -58,6 +59,7 @@ class HIntConstant;
class HInvoke;
class HLongConstant;
class HNullConstant;
+class HParameterValue;
class HPhi;
class HSuspendCheck;
class HTryBoundary;
@@ -538,6 +540,12 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
return method_idx_;
}
+ // Get the method name (without the signature), e.g. "<init>"
+ const char* GetMethodName() const;
+
+ // Get the pretty method name (class + name + optionally signature).
+ std::string PrettyMethod(bool with_signature = true) const;
+
InvokeType GetInvokeType() const {
return invoke_type_;
}
@@ -1298,6 +1306,7 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(ClearException, Instruction) \
M(ClinitCheck, Instruction) \
M(Compare, BinaryOperation) \
+ M(ConstructorFence, Instruction) \
M(CurrentMethod, Instruction) \
M(ShouldDeoptimizeFlag, Instruction) \
M(Deoptimize, Instruction) \
@@ -1397,7 +1406,8 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(BitwiseNegatedRight, Instruction) \
M(DataProcWithShifterOp, Instruction) \
M(MultiplyAccumulate, Instruction) \
- M(IntermediateAddress, Instruction)
+ M(IntermediateAddress, Instruction) \
+ M(IntermediateAddressIndex, Instruction)
#endif
#ifndef ART_ENABLE_CODEGEN_arm
@@ -1477,8 +1487,11 @@ FOR_EACH_INSTRUCTION(FORWARD_DECLARATION)
template <typename T>
class HUseListNode : public ArenaObject<kArenaAllocUseListNode> {
public:
+ // Get the instruction which has this use as one of the inputs.
T GetUser() const { return user_; }
+ // Get the position of the input record that this use corresponds to.
size_t GetIndex() const { return index_; }
+ // Set the position of the input record that this use corresponds to.
void SetIndex(size_t index) { index_ = index; }
// Hook for the IntrusiveForwardList<>.
@@ -1777,7 +1790,7 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> {
uint32_t dex_pc,
HInstruction* holder)
: vregs_(number_of_vregs, arena->Adapter(kArenaAllocEnvironmentVRegs)),
- locations_(number_of_vregs, arena->Adapter(kArenaAllocEnvironmentLocations)),
+ locations_(arena->Adapter(kArenaAllocEnvironmentLocations)),
parent_(nullptr),
method_(method),
dex_pc_(dex_pc),
@@ -1791,6 +1804,11 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> {
to_copy.GetDexPc(),
holder) {}
+ void AllocateLocations() {
+ DCHECK(locations_.empty());
+ locations_.resize(vregs_.size());
+ }
+
void SetAndCopyParentChain(ArenaAllocator* allocator, HEnvironment* parent) {
if (parent_ != nullptr) {
parent_->SetAndCopyParentChain(allocator, parent);
@@ -2038,7 +2056,8 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
!IsNativeDebugInfo() &&
!IsParameterValue() &&
// If we added an explicit barrier then we should keep it.
- !IsMemoryBarrier();
+ !IsMemoryBarrier() &&
+ !IsConstructorFence();
}
bool IsDeadAndRemovable() const {
@@ -2432,6 +2451,11 @@ class HVariableInputSizeInstruction : public HInstruction {
void InsertInputAt(size_t index, HInstruction* input);
void RemoveInputAt(size_t index);
+ // Removes all the inputs.
+ // Also removes this instructions from each input's use list
+ // (for non-environment uses only).
+ void RemoveAllInputs();
+
protected:
HVariableInputSizeInstruction(SideEffects side_effects,
uint32_t dex_pc,
@@ -4134,6 +4158,10 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
// Use the method's own ArtMethod* loaded by the register allocator.
kRecursive,
+ // Use PC-relative boot image ArtMethod* address that will be known at link time.
+ // Used for boot image methods referenced by boot image code.
+ kBootImageLinkTimePcRelative,
+
// Use ArtMethod* at a known address, embed the direct address in the code.
// Used for app->boot calls with non-relocatable image and for JIT-compiled calls.
kDirectAddress,
@@ -4273,6 +4301,10 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
bool HasPcRelativeDexCache() const {
return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative;
}
+ bool HasPcRelativeMethodLoadKind() const {
+ return GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative ||
+ GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative;
+ }
bool HasCurrentMethodInput() const {
// This function can be called only after the invoke has been fully initialized by the builder.
if (NeedsCurrentMethodInput(GetMethodLoadKind())) {
@@ -5063,7 +5095,7 @@ class HParameterValue FINAL : public HExpression<0> {
const DexFile& GetDexFile() const { return dex_file_; }
dex::TypeIndex GetTypeIndex() const { return type_index_; }
uint8_t GetIndex() const { return index_; }
- bool IsThis() const ATTRIBUTE_UNUSED { return GetPackedFlag<kFlagIsThis>(); }
+ bool IsThis() const { return GetPackedFlag<kFlagIsThis>(); }
bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); }
void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); }
@@ -5371,10 +5403,16 @@ class HArrayGet FINAL : public HExpression<2> {
}
bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
// TODO: We can be smarter here.
- // Currently, the array access is always preceded by an ArrayLength or a NullCheck
- // which generates the implicit null check. There are cases when these can be removed
- // to produce better code. If we ever add optimizations to do so we should allow an
- // implicit check here (as long as the address falls in the first page).
+ // Currently, unless the array is the result of NewArray, the array access is always
+ // preceded by some form of null NullCheck necessary for the bounds check, usually
+ // implicit null check on the ArrayLength input to BoundsCheck or Deoptimize for
+ // dynamic BCE. There are cases when these could be removed to produce better code.
+ // If we ever add optimizations to do so we should allow an implicit check here
+ // (as long as the address falls in the first page).
+ //
+ // As an example of such fancy optimization, we could eliminate BoundsCheck for
+ // a = cond ? new int[1] : null;
+ // a[0]; // The Phi does not need bounds check for either input.
return false;
}
@@ -5639,12 +5677,8 @@ class HLoadClass FINAL : public HInstruction {
// Use the Class* from the method's own ArtMethod*.
kReferrersClass,
- // Use boot image Class* address that will be known at link time.
- // Used for boot image classes referenced by boot image code in non-PIC mode.
- kBootImageLinkTimeAddress,
-
// Use PC-relative boot image Class* address that will be known at link time.
- // Used for boot image classes referenced by boot image code in PIC mode.
+ // Used for boot image classes referenced by boot image code.
kBootImageLinkTimePcRelative,
// Use a known boot image Class* address, embedded in the code by the codegen.
@@ -5658,12 +5692,11 @@ class HLoadClass FINAL : public HInstruction {
// Load from the root table associated with the JIT compiled method.
kJitTableAddress,
- // Load from resolved types array accessed through the class loaded from
- // the compiled method's own ArtMethod*. This is the default access type when
- // all other types are unavailable.
- kDexCacheViaMethod,
+ // Load using a simple runtime call. This is the fall-back load kind when
+ // the codegen is unable to use another appropriate kind.
+ kRuntimeCall,
- kLast = kDexCacheViaMethod
+ kLast = kRuntimeCall
};
HLoadClass(HCurrentMethod* current_method,
@@ -5684,7 +5717,7 @@ class HLoadClass FINAL : public HInstruction {
DCHECK(!is_referrers_class || !needs_access_check);
SetPackedField<LoadKindField>(
- is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kDexCacheViaMethod);
+ is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kRuntimeCall);
SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check);
SetPackedFlag<kFlagIsInBootImage>(false);
SetPackedFlag<kFlagGenerateClInitCheck>(false);
@@ -5718,7 +5751,7 @@ class HLoadClass FINAL : public HInstruction {
bool CanCallRuntime() const {
return NeedsAccessCheck() ||
MustGenerateClinitCheck() ||
- GetLoadKind() == LoadKind::kDexCacheViaMethod ||
+ GetLoadKind() == LoadKind::kRuntimeCall ||
GetLoadKind() == LoadKind::kBssEntry;
}
@@ -5728,7 +5761,7 @@ class HLoadClass FINAL : public HInstruction {
// If the class is in the boot image, the lookup in the runtime call cannot throw.
// This keeps CanThrow() consistent between non-PIC (using kBootImageAddress) and
// PIC and subsequently avoids a DCE behavior dependency on the PIC option.
- ((GetLoadKind() == LoadKind::kDexCacheViaMethod ||
+ ((GetLoadKind() == LoadKind::kRuntimeCall ||
GetLoadKind() == LoadKind::kBssEntry) &&
!IsInBootImage());
}
@@ -5747,7 +5780,7 @@ class HLoadClass FINAL : public HInstruction {
const DexFile& GetDexFile() const { return dex_file_; }
bool NeedsDexCacheOfDeclaringClass() const OVERRIDE {
- return GetLoadKind() == LoadKind::kDexCacheViaMethod;
+ return GetLoadKind() == LoadKind::kRuntimeCall;
}
static SideEffects SideEffectsForArchRuntimeCalls() {
@@ -5796,15 +5829,14 @@ class HLoadClass FINAL : public HInstruction {
static bool HasTypeReference(LoadKind load_kind) {
return load_kind == LoadKind::kReferrersClass ||
- load_kind == LoadKind::kBootImageLinkTimeAddress ||
load_kind == LoadKind::kBootImageLinkTimePcRelative ||
load_kind == LoadKind::kBssEntry ||
- load_kind == LoadKind::kDexCacheViaMethod;
+ load_kind == LoadKind::kRuntimeCall;
}
void SetLoadKindInternal(LoadKind load_kind);
- // The special input is the HCurrentMethod for kDexCacheViaMethod or kReferrersClass.
+ // The special input is the HCurrentMethod for kRuntimeCall or kReferrersClass.
// For other load kinds it's empty or possibly some architecture-specific instruction
// for PC-relative loads, i.e. kBssEntry or kBootImageLinkTimePcRelative.
HUserRecord<HInstruction*> special_input_;
@@ -5813,7 +5845,7 @@ class HLoadClass FINAL : public HInstruction {
// - The compiling method's dex file if the class is defined there too.
// - The compiling method's dex file if the class is referenced there.
// - The dex file where the class is defined. When the load kind can only be
- // kBssEntry or kDexCacheViaMethod, we cannot emit code for this `HLoadClass`.
+ // kBssEntry or kRuntimeCall, we cannot emit code for this `HLoadClass`.
const dex::TypeIndex type_index_;
const DexFile& dex_file_;
@@ -5830,7 +5862,6 @@ inline void HLoadClass::AddSpecialInput(HInstruction* special_input) {
// The special input is used for PC-relative loads on some architectures,
// including literal pool loads, which are PC-relative too.
DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
- GetLoadKind() == LoadKind::kBootImageLinkTimeAddress ||
GetLoadKind() == LoadKind::kBootImageAddress ||
GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind();
DCHECK(special_input_.GetInstruction() == nullptr);
@@ -5842,12 +5873,8 @@ class HLoadString FINAL : public HInstruction {
public:
// Determines how to load the String.
enum class LoadKind {
- // Use boot image String* address that will be known at link time.
- // Used for boot image strings referenced by boot image code in non-PIC mode.
- kBootImageLinkTimeAddress,
-
// Use PC-relative boot image String* address that will be known at link time.
- // Used for boot image strings referenced by boot image code in PIC mode.
+ // Used for boot image strings referenced by boot image code.
kBootImageLinkTimePcRelative,
// Use a known boot image String* address, embedded in the code by the codegen.
@@ -5861,12 +5888,11 @@ class HLoadString FINAL : public HInstruction {
// Load from the root table associated with the JIT compiled method.
kJitTableAddress,
- // Load from resolved strings array accessed through the class loaded from
- // the compiled method's own ArtMethod*. This is the default access type when
- // all other types are unavailable.
- kDexCacheViaMethod,
+ // Load using a simple runtime call. This is the fall-back load kind when
+ // the codegen is unable to use another appropriate kind.
+ kRuntimeCall,
- kLast = kDexCacheViaMethod,
+ kLast = kRuntimeCall,
};
HLoadString(HCurrentMethod* current_method,
@@ -5877,7 +5903,7 @@ class HLoadString FINAL : public HInstruction {
special_input_(HUserRecord<HInstruction*>(current_method)),
string_index_(string_index),
dex_file_(dex_file) {
- SetPackedField<LoadKindField>(LoadKind::kDexCacheViaMethod);
+ SetPackedField<LoadKindField>(LoadKind::kRuntimeCall);
}
void SetLoadKind(LoadKind load_kind);
@@ -5912,8 +5938,7 @@ class HLoadString FINAL : public HInstruction {
// the dex cache and the string is not guaranteed to be there yet.
bool NeedsEnvironment() const OVERRIDE {
LoadKind load_kind = GetLoadKind();
- if (load_kind == LoadKind::kBootImageLinkTimeAddress ||
- load_kind == LoadKind::kBootImageLinkTimePcRelative ||
+ if (load_kind == LoadKind::kBootImageLinkTimePcRelative ||
load_kind == LoadKind::kBootImageAddress ||
load_kind == LoadKind::kJitTableAddress) {
return false;
@@ -5922,7 +5947,7 @@ class HLoadString FINAL : public HInstruction {
}
bool NeedsDexCacheOfDeclaringClass() const OVERRIDE {
- return GetLoadKind() == LoadKind::kDexCacheViaMethod;
+ return GetLoadKind() == LoadKind::kRuntimeCall;
}
bool CanBeNull() const OVERRIDE { return false; }
@@ -5956,7 +5981,7 @@ class HLoadString FINAL : public HInstruction {
void SetLoadKindInternal(LoadKind load_kind);
- // The special input is the HCurrentMethod for kDexCacheViaMethod.
+ // The special input is the HCurrentMethod for kRuntimeCall.
// For other load kinds it's empty or possibly some architecture-specific instruction
// for PC-relative loads, i.e. kBssEntry or kBootImageLinkTimePcRelative.
HUserRecord<HInstruction*> special_input_;
@@ -5976,7 +6001,6 @@ inline void HLoadString::AddSpecialInput(HInstruction* special_input) {
// including literal pool loads, which are PC-relative too.
DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
GetLoadKind() == LoadKind::kBssEntry ||
- GetLoadKind() == LoadKind::kBootImageLinkTimeAddress ||
GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind();
// HLoadString::GetInputRecords() returns an empty array at this point,
// so use the GetInputRecords() from the base class to set the input record.
@@ -6495,6 +6519,145 @@ class HMemoryBarrier FINAL : public HTemplateInstruction<0> {
DISALLOW_COPY_AND_ASSIGN(HMemoryBarrier);
};
+// A constructor fence orders all prior stores to fields that could be accessed via a final field of
+// the specified object(s), with respect to any subsequent store that might "publish"
+// (i.e. make visible) the specified object to another thread.
+//
+// JLS 17.5.1 "Semantics of final fields" states that a freeze action happens
+// for all final fields (that were set) at the end of the invoked constructor.
+//
+// The constructor fence models the freeze actions for the final fields of an object
+// being constructed (semantically at the end of the constructor). Constructor fences
+// have a per-object affinity; two separate objects being constructed get two separate
+// constructor fences.
+//
+// (Note: that if calling a super-constructor or forwarding to another constructor,
+// the freezes would happen at the end of *that* constructor being invoked).
+//
+// The memory model guarantees that when the object being constructed is "published" after
+// constructor completion (i.e. escapes the current thread via a store), then any final field
+// writes must be observable on other threads (once they observe that publication).
+//
+// Further, anything written before the freeze, and read by dereferencing through the final field,
+// must also be visible (so final object field could itself have an object with non-final fields;
+// yet the freeze must also extend to them).
+//
+// Constructor example:
+//
+// class HasFinal {
+// final int field; Optimizing IR for <init>()V:
+// HasFinal() {
+// field = 123; HInstanceFieldSet(this, HasFinal.field, 123)
+// // freeze(this.field); HConstructorFence(this)
+// } HReturn
+// }
+//
+// HConstructorFence can serve double duty as a fence for new-instance/new-array allocations of
+// already-initialized classes; in that case the allocation must act as a "default-initializer"
+// of the object which effectively writes the class pointer "final field".
+//
+// For example, we can model default-initialiation as roughly the equivalent of the following:
+//
+// class Object {
+// private final Class header;
+// }
+//
+// Java code: Optimizing IR:
+//
+// T new_instance<T>() {
+// Object obj = allocate_memory(T.class.size); obj = HInvoke(art_quick_alloc_object, T)
+// obj.header = T.class; // header write is done by above call.
+// // freeze(obj.header) HConstructorFence(obj)
+// return (T)obj;
+// }
+//
+// See also:
+// * CompilerDriver::RequiresConstructorBarrier
+// * QuasiAtomic::ThreadFenceForConstructor
+//
+class HConstructorFence FINAL : public HVariableInputSizeInstruction {
+ // A fence has variable inputs because the inputs can be removed
+ // after prepare_for_register_allocation phase.
+ // (TODO: In the future a fence could freeze multiple objects
+ // after merging two fences together.)
+ public:
+ // `fence_object` is the reference that needs to be protected for correct publication.
+ //
+ // It makes sense in the following situations:
+ // * <init> constructors, it's the "this" parameter (i.e. HParameterValue, s.t. IsThis() == true).
+ // * new-instance-like instructions, it's the return value (i.e. HNewInstance).
+ //
+ // After construction the `fence_object` becomes the 0th input.
+ // This is not an input in a real sense, but just a convenient place to stash the information
+ // about the associated object.
+ HConstructorFence(HInstruction* fence_object,
+ uint32_t dex_pc,
+ ArenaAllocator* arena)
+ // We strongly suspect there is not a more accurate way to describe the fine-grained reordering
+ // constraints described in the class header. We claim that these SideEffects constraints
+ // enforce a superset of the real constraints.
+ //
+ // The ordering described above is conservatively modeled with SideEffects as follows:
+ //
+ // * To prevent reordering of the publication stores:
+ // ----> "Reads of objects" is the initial SideEffect.
+ // * For every primitive final field store in the constructor:
+ // ----> Union that field's type as a read (e.g. "Read of T") into the SideEffect.
+ // * If there are any stores to reference final fields in the constructor:
+ // ----> Use a more conservative "AllReads" SideEffect because any stores to any references
+ // that are reachable from `fence_object` also need to be prevented for reordering
+ // (and we do not want to do alias analysis to figure out what those stores are).
+ //
+ // In the implementation, this initially starts out as an "all reads" side effect; this is an
+ // even more conservative approach than the one described above, and prevents all of the
+ // above reordering without analyzing any of the instructions in the constructor.
+ //
+ // If in a later phase we discover that there are no writes to reference final fields,
+ // we can refine the side effect to a smaller set of type reads (see above constraints).
+ : HVariableInputSizeInstruction(SideEffects::AllReads(),
+ dex_pc,
+ arena,
+ /* number_of_inputs */ 1,
+ kArenaAllocConstructorFenceInputs) {
+ DCHECK(fence_object != nullptr);
+ SetRawInputAt(0, fence_object);
+ }
+
+ // The object associated with this constructor fence.
+ //
+ // (Note: This will be null after the prepare_for_register_allocation phase,
+ // as all constructor fence inputs are removed there).
+ HInstruction* GetFenceObject() const {
+ return InputAt(0);
+ }
+
+ // Find all the HConstructorFence uses (`fence_use`) for `this` and:
+ // - Delete `fence_use` from `this`'s use list.
+ // - Delete `this` from `fence_use`'s inputs list.
+ // - If the `fence_use` is dead, remove it from the graph.
+ //
+ // A fence is considered dead once it no longer has any uses
+ // and all of the inputs are dead.
+ //
+ // This must *not* be called during/after prepare_for_register_allocation,
+ // because that removes all the inputs to the fences but the fence is actually
+ // still considered live.
+ static void RemoveConstructorFences(HInstruction* instruction);
+
+ // Check if this constructor fence is protecting
+ // an HNewInstance or HNewArray that is also the immediate
+ // predecessor of `this`.
+ //
+ // Returns the associated HNewArray or HNewInstance,
+ // or null otherwise.
+ HInstruction* GetAssociatedAllocation();
+
+ DECLARE_INSTRUCTION(ConstructorFence);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HConstructorFence);
+};
+
class HMonitorOperation FINAL : public HTemplateInstruction<1> {
public:
enum class OperationKind {
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index c6bfbcc7fb..075a816f3f 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -150,6 +150,49 @@ class HIntermediateAddress FINAL : public HExpression<2> {
DISALLOW_COPY_AND_ASSIGN(HIntermediateAddress);
};
+// This instruction computes part of the array access offset (data and index offset).
+//
+// For array accesses the element address has the following structure:
+// Address = CONST_OFFSET + base_addr + index << ELEM_SHIFT. Taking into account LDR/STR addressing
+// modes address part (CONST_OFFSET + index << ELEM_SHIFT) can be shared across array access with
+// the same data type and index. For example, for the following loop 5 accesses can share address
+// computation:
+//
+// void foo(int[] a, int[] b, int[] c) {
+// for (i...) {
+// a[i] = a[i] + 5;
+// b[i] = b[i] + c[i];
+// }
+// }
+//
+// Note: as the instruction doesn't involve base array address into computations it has no side
+// effects (in comparison of HIntermediateAddress).
+class HIntermediateAddressIndex FINAL : public HExpression<3> {
+ public:
+ HIntermediateAddressIndex(
+ HInstruction* index, HInstruction* offset, HInstruction* shift, uint32_t dex_pc)
+ : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) {
+ SetRawInputAt(0, index);
+ SetRawInputAt(1, offset);
+ SetRawInputAt(2, shift);
+ }
+
+ bool CanBeMoved() const OVERRIDE { return true; }
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ return true;
+ }
+ bool IsActualObject() const OVERRIDE { return false; }
+
+ HInstruction* GetIndex() const { return InputAt(0); }
+ HInstruction* GetOffset() const { return InputAt(1); }
+ HInstruction* GetShift() const { return InputAt(2); }
+
+ DECLARE_INSTRUCTION(IntermediateAddressIndex);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HIntermediateAddressIndex);
+};
+
class HDataProcWithShifterOp FINAL : public HExpression<2> {
public:
enum OpKind {
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 52c247b52f..5dbe29b4fa 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -178,12 +178,17 @@ class HVecMemoryOperation : public HVecOperation {
size_t vector_length,
uint32_t dex_pc)
: HVecOperation(arena, packed_type, side_effects, number_of_inputs, vector_length, dex_pc),
- alignment_(Primitive::ComponentSize(packed_type), 0) { }
+ alignment_(Primitive::ComponentSize(packed_type), 0) {
+ DCHECK_GE(number_of_inputs, 2u);
+ }
void SetAlignment(Alignment alignment) { alignment_ = alignment; }
Alignment GetAlignment() const { return alignment_; }
+ HInstruction* GetArray() const { return InputAt(0); }
+ HInstruction* GetIndex() const { return InputAt(1); }
+
DECLARE_ABSTRACT_INSTRUCTION(VecMemoryOperation);
private:
@@ -451,13 +456,24 @@ class HVecMin FINAL : public HVecBinaryOperation {
HInstruction* right,
Primitive::Type packed_type,
size_t vector_length,
+ bool is_unsigned,
uint32_t dex_pc = kNoDexPc)
: HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(left, packed_type));
DCHECK(HasConsistentPackedTypes(right, packed_type));
+ SetPackedFlag<kFieldMinOpIsUnsigned>(is_unsigned);
}
+
+ bool IsUnsigned() const { return GetPackedFlag<kFieldMinOpIsUnsigned>(); }
+
DECLARE_INSTRUCTION(VecMin);
+
private:
+ // Additional packed bits.
+ static constexpr size_t kFieldMinOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits;
+ static constexpr size_t kNumberOfMinOpPackedBits = kFieldMinOpIsUnsigned + 1;
+ static_assert(kNumberOfMinOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+
DISALLOW_COPY_AND_ASSIGN(HVecMin);
};
@@ -470,13 +486,24 @@ class HVecMax FINAL : public HVecBinaryOperation {
HInstruction* right,
Primitive::Type packed_type,
size_t vector_length,
+ bool is_unsigned,
uint32_t dex_pc = kNoDexPc)
: HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(left, packed_type));
DCHECK(HasConsistentPackedTypes(right, packed_type));
+ SetPackedFlag<kFieldMaxOpIsUnsigned>(is_unsigned);
}
+
+ bool IsUnsigned() const { return GetPackedFlag<kFieldMaxOpIsUnsigned>(); }
+
DECLARE_INSTRUCTION(VecMax);
+
private:
+ // Additional packed bits.
+ static constexpr size_t kFieldMaxOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits;
+ static constexpr size_t kNumberOfMaxOpPackedBits = kFieldMaxOpIsUnsigned + 1;
+ static_assert(kNumberOfMaxOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+
DISALLOW_COPY_AND_ASSIGN(HVecMax);
};
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 065c11eddb..e5ab00bce3 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -83,6 +83,7 @@
#include "jit/jit_code_cache.h"
#include "jni/quick/jni_compiler.h"
#include "licm.h"
+#include "load_store_analysis.h"
#include "load_store_elimination.h"
#include "loop_optimization.h"
#include "nodes.h"
@@ -465,7 +466,8 @@ static HOptimization* BuildOptimization(
const DexCompilationUnit& dex_compilation_unit,
VariableSizedHandleScope* handles,
SideEffectsAnalysis* most_recent_side_effects,
- HInductionVarAnalysis* most_recent_induction) {
+ HInductionVarAnalysis* most_recent_induction,
+ LoadStoreAnalysis* most_recent_lsa) {
std::string opt_name = ConvertPassNameToOptimizationName(pass_name);
if (opt_name == BoundsCheckElimination::kBoundsCheckEliminationPassName) {
CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr);
@@ -499,15 +501,18 @@ static HOptimization* BuildOptimization(
} else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
return new (arena) HInductionVarAnalysis(graph);
} else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) {
- return new (arena) InstructionSimplifier(graph, codegen, stats, pass_name.c_str());
+ return new (arena) InstructionSimplifier(graph, codegen, driver, stats, pass_name.c_str());
} else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) {
return new (arena) IntrinsicsRecognizer(graph, stats);
} else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) {
CHECK(most_recent_side_effects != nullptr);
return new (arena) LICM(graph, *most_recent_side_effects, stats);
+ } else if (opt_name == LoadStoreAnalysis::kLoadStoreAnalysisPassName) {
+ return new (arena) LoadStoreAnalysis(graph);
} else if (opt_name == LoadStoreElimination::kLoadStoreEliminationPassName) {
CHECK(most_recent_side_effects != nullptr);
- return new (arena) LoadStoreElimination(graph, *most_recent_side_effects);
+ CHECK(most_recent_lsa != nullptr);
+ return new (arena) LoadStoreElimination(graph, *most_recent_side_effects, *most_recent_lsa);
} else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
return new (arena) SideEffectsAnalysis(graph);
} else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) {
@@ -556,6 +561,7 @@ static ArenaVector<HOptimization*> BuildOptimizations(
// in the pass name list.
SideEffectsAnalysis* most_recent_side_effects = nullptr;
HInductionVarAnalysis* most_recent_induction = nullptr;
+ LoadStoreAnalysis* most_recent_lsa = nullptr;
ArenaVector<HOptimization*> ret(arena->Adapter());
for (const std::string& pass_name : pass_names) {
HOptimization* opt = BuildOptimization(
@@ -568,7 +574,8 @@ static ArenaVector<HOptimization*> BuildOptimizations(
dex_compilation_unit,
handles,
most_recent_side_effects,
- most_recent_induction);
+ most_recent_induction,
+ most_recent_lsa);
CHECK(opt != nullptr) << "Couldn't build optimization: \"" << pass_name << "\"";
ret.push_back(opt);
@@ -577,6 +584,8 @@ static ArenaVector<HOptimization*> BuildOptimizations(
most_recent_side_effects = down_cast<SideEffectsAnalysis*>(opt);
} else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
most_recent_induction = down_cast<HInductionVarAnalysis*>(opt);
+ } else if (opt_name == LoadStoreAnalysis::kLoadStoreAnalysisPassName) {
+ most_recent_lsa = down_cast<LoadStoreAnalysis*>(opt);
}
}
return ret;
@@ -638,11 +647,14 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set,
new (arena) arm::InstructionSimplifierArm(graph, stats);
SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch");
+ HInstructionScheduling* scheduling =
+ new (arena) HInstructionScheduling(graph, instruction_set, codegen);
HOptimization* arm_optimizations[] = {
simplifier,
side_effects,
gvn,
- fixups
+ fixups,
+ scheduling,
};
RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
break;
@@ -760,7 +772,8 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
HDeadCodeElimination* dce3 = new (arena) HDeadCodeElimination(
graph, stats, "dead_code_elimination$final");
HConstantFolding* fold1 = new (arena) HConstantFolding(graph, "constant_folding");
- InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, codegen, stats);
+ InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(
+ graph, codegen, driver, stats);
HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats);
HConstantFolding* fold2 = new (arena) HConstantFolding(
graph, "constant_folding$after_inlining");
@@ -774,15 +787,16 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects1, induction);
HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction);
- LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2);
+ LoadStoreAnalysis* lsa = new (arena) LoadStoreAnalysis(graph);
+ LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2, *lsa);
HSharpening* sharpening = new (arena) HSharpening(
graph, codegen, dex_compilation_unit, driver, handles);
InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
- graph, codegen, stats, "instruction_simplifier$after_inlining");
+ graph, codegen, driver, stats, "instruction_simplifier$after_inlining");
InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
- graph, codegen, stats, "instruction_simplifier$after_bce");
+ graph, codegen, driver, stats, "instruction_simplifier$after_bce");
InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier(
- graph, codegen, stats, "instruction_simplifier$before_codegen");
+ graph, codegen, driver, stats, "instruction_simplifier$before_codegen");
IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats);
CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph);
CodeSinking* code_sinking = new (arena) CodeSinking(graph, stats);
@@ -814,6 +828,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
fold3, // evaluates code generated by dynamic bce
simplify3,
side_effects2,
+ lsa,
lse,
cha_guard,
dce3,
diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc
index a0fdde169d..bce54bf49a 100644
--- a/compiler/optimizing/pc_relative_fixups_mips.cc
+++ b/compiler/optimizing/pc_relative_fixups_mips.cc
@@ -58,10 +58,22 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
DCHECK(base_ != nullptr);
}
+ void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+ // If this is an invoke with PC-relative pointer to a method,
+ // we need to add the base as the special input.
+ if (invoke->GetMethodLoadKind() ==
+ HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative &&
+ !IsCallFreeIntrinsic<IntrinsicLocationsBuilderMIPS>(invoke, codegen_)) {
+ InitializePCRelativeBasePointer();
+ // Add the special argument base to the method.
+ DCHECK(!invoke->HasCurrentMethodInput());
+ invoke->AddSpecialInput(base_);
+ }
+ }
+
void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
HLoadClass::LoadKind load_kind = load_class->GetLoadKind();
switch (load_kind) {
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
case HLoadClass::LoadKind::kBootImageAddress:
case HLoadClass::LoadKind::kBssEntry:
@@ -77,7 +89,6 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
void VisitLoadString(HLoadString* load_string) OVERRIDE {
HLoadString::LoadKind load_kind = load_string->GetLoadKind();
switch (load_kind) {
- case HLoadString::LoadKind::kBootImageLinkTimeAddress:
case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
case HLoadString::LoadKind::kBssEntry:
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index a1c916f43a..2743df9dcf 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -205,13 +205,13 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
// method pointer from the invoke.
if (invoke_static_or_direct != nullptr &&
invoke_static_or_direct->HasCurrentMethodInput()) {
- DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache());
+ DCHECK(!invoke_static_or_direct->HasPcRelativeMethodLoadKind());
return;
}
bool base_added = false;
if (invoke_static_or_direct != nullptr &&
- invoke_static_or_direct->HasPcRelativeDexCache() &&
+ invoke_static_or_direct->HasPcRelativeMethodLoadKind() &&
!IsCallFreeIntrinsic<IntrinsicLocationsBuilderX86>(invoke, codegen_)) {
HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(invoke);
// Add the extra parameter.
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 66bfea9860..aa42fd647b 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -167,6 +167,45 @@ void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) {
}
}
+void PrepareForRegisterAllocation::VisitConstructorFence(HConstructorFence* constructor_fence) {
+ // Trivially remove redundant HConstructorFence when it immediately follows an HNewInstance
+ // to an uninitialized class. In this special case, the art_quick_alloc_object_resolved
+ // will already have the 'dmb' which is strictly stronger than an HConstructorFence.
+ //
+ // The instruction builder always emits "x = HNewInstance; HConstructorFence(x)" so this
+ // is effectively pattern-matching that particular case and undoing the redundancy the builder
+ // had introduced.
+ //
+ // TODO: Move this to a separate pass.
+ HInstruction* allocation_inst = constructor_fence->GetAssociatedAllocation();
+ if (allocation_inst != nullptr && allocation_inst->IsNewInstance()) {
+ HNewInstance* new_inst = allocation_inst->AsNewInstance();
+ // This relies on the entrypoint already being set to the more optimized version;
+ // as that happens in this pass, this redundancy removal also cannot happen any earlier.
+ if (new_inst != nullptr && new_inst->GetEntrypoint() == kQuickAllocObjectResolved) {
+ // If this was done in an earlier pass, we would want to match that `previous` was an input
+ // to the `constructor_fence`. However, since this pass removes the inputs to the fence,
+ // we can ignore the inputs and just remove the instruction from its block.
+ DCHECK_EQ(1u, constructor_fence->InputCount());
+ // TODO: GetAssociatedAllocation should not care about multiple inputs
+ // if we are in prepare_for_register_allocation pass only.
+ constructor_fence->GetBlock()->RemoveInstruction(constructor_fence);
+ return;
+ // TODO: actually remove the dmb from the .S entrypoints (initialized variants only).
+ }
+
+ // HNewArray does not need this check because the art_quick_alloc_array does not itself
+ // have a dmb in any normal situation (i.e. the array class is never exactly in the
+ // "resolved" state). If the array class is not yet loaded, it will always go from
+ // Unloaded->Initialized state.
+ }
+
+ // Remove all the inputs to the constructor fence;
+ // they aren't used by the InstructionCodeGenerator and this lets us avoid creating a
+ // LocationSummary in the LocationsBuilder.
+ constructor_fence->RemoveAllInputs();
+}
+
void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
if (invoke->IsStaticWithExplicitClinitCheck()) {
HLoadClass* last_input = invoke->GetInputs().back()->AsLoadClass();
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index 7ffbe44ef6..395d4ba2ee 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -43,6 +43,7 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor {
void VisitArraySet(HArraySet* instruction) OVERRIDE;
void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
void VisitCondition(HCondition* condition) OVERRIDE;
+ void VisitConstructorFence(HConstructorFence* constructor_fence) OVERRIDE;
void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
index 87f709f63d..2fd7b03151 100644
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -20,7 +20,7 @@
#include "linear_order.h"
#include "register_allocation_resolver.h"
#include "ssa_liveness_analysis.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
namespace art {
@@ -1968,8 +1968,7 @@ void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* in
ArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints(
allocator_->Adapter(kArenaAllocRegisterAllocator));
- for (auto it = intervals->begin(), e = intervals->end(); it != e; ++it) {
- LiveInterval* parent_interval = *it;
+ for (LiveInterval* parent_interval : *intervals) {
DCHECK(parent_interval->IsParent());
DCHECK(!parent_interval->HasSpillSlot());
size_t start = parent_interval->GetStart();
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index d65d20cf43..320f01a727 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -23,6 +23,10 @@
#include "scheduler_arm64.h"
#endif
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "scheduler_arm.h"
+#endif
+
namespace art {
void SchedulingGraph::AddDependency(SchedulingNode* node,
@@ -264,10 +268,11 @@ void SchedulingGraph::DumpAsDotGraph(const std::string& description,
// Start the dot graph. Use an increasing index for easier differentiation.
output << "digraph G {\n";
for (const auto& entry : nodes_map_) {
- DumpAsDotNode(output, entry.second);
+ SchedulingNode* node = entry.second;
+ DumpAsDotNode(output, node);
}
// Create a fake 'end_of_scheduling' node to help visualization of critical_paths.
- for (auto node : initial_candidates) {
+ for (SchedulingNode* node : initial_candidates) {
const HInstruction* instruction = node->GetInstruction();
output << InstructionTypeId(instruction) << ":s -> end_of_scheduling:n "
<< "[label=\"" << node->GetLatency() << "\",dir=back]\n";
@@ -580,28 +585,39 @@ bool HScheduler::IsSchedulingBarrier(const HInstruction* instr) const {
void HInstructionScheduling::Run(bool only_optimize_loop_blocks,
bool schedule_randomly) {
+#if defined(ART_ENABLE_CODEGEN_arm64) || defined(ART_ENABLE_CODEGEN_arm)
+ // Phase-local allocator that allocates scheduler internal data structures like
+ // scheduling nodes, internel nodes map, dependencies, etc.
+ ArenaAllocator arena_allocator(graph_->GetArena()->GetArenaPool());
+ CriticalPathSchedulingNodeSelector critical_path_selector;
+ RandomSchedulingNodeSelector random_selector;
+ SchedulingNodeSelector* selector = schedule_randomly
+ ? static_cast<SchedulingNodeSelector*>(&random_selector)
+ : static_cast<SchedulingNodeSelector*>(&critical_path_selector);
+#else
// Avoid compilation error when compiling for unsupported instruction set.
UNUSED(only_optimize_loop_blocks);
UNUSED(schedule_randomly);
+#endif
switch (instruction_set_) {
#ifdef ART_ENABLE_CODEGEN_arm64
case kArm64: {
- // Phase-local allocator that allocates scheduler internal data structures like
- // scheduling nodes, internel nodes map, dependencies, etc.
- ArenaAllocator arena_allocator(graph_->GetArena()->GetArenaPool());
-
- CriticalPathSchedulingNodeSelector critical_path_selector;
- RandomSchedulingNodeSelector random_selector;
- SchedulingNodeSelector* selector = schedule_randomly
- ? static_cast<SchedulingNodeSelector*>(&random_selector)
- : static_cast<SchedulingNodeSelector*>(&critical_path_selector);
-
arm64::HSchedulerARM64 scheduler(&arena_allocator, selector);
scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
scheduler.Schedule(graph_);
break;
}
#endif
+#if defined(ART_ENABLE_CODEGEN_arm)
+ case kThumb2:
+ case kArm: {
+ arm::SchedulingLatencyVisitorARM arm_latency_visitor(codegen_);
+ arm::HSchedulerARM scheduler(&arena_allocator, selector, &arm_latency_visitor);
+ scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
+ scheduler.Schedule(graph_);
+ break;
+ }
+#endif
default:
break;
}
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
index 9236a0e4fa..73e8087cd0 100644
--- a/compiler/optimizing/scheduler.h
+++ b/compiler/optimizing/scheduler.h
@@ -23,6 +23,7 @@
#include "driver/compiler_driver.h"
#include "nodes.h"
#include "optimization.h"
+#include "code_generator.h"
namespace art {
@@ -469,8 +470,9 @@ inline bool SchedulingGraph::IsSchedulingBarrier(const HInstruction* instruction
class HInstructionScheduling : public HOptimization {
public:
- HInstructionScheduling(HGraph* graph, InstructionSet instruction_set)
+ HInstructionScheduling(HGraph* graph, InstructionSet instruction_set, CodeGenerator* cg = nullptr)
: HOptimization(graph, kInstructionScheduling),
+ codegen_(cg),
instruction_set_(instruction_set) {}
void Run() {
@@ -480,6 +482,7 @@ class HInstructionScheduling : public HOptimization {
static constexpr const char* kInstructionScheduling = "scheduler";
+ CodeGenerator* const codegen_;
const InstructionSet instruction_set_;
private:
diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc
new file mode 100644
index 0000000000..832a7e1571
--- /dev/null
+++ b/compiler/optimizing/scheduler_arm.cc
@@ -0,0 +1,827 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arch/arm/instruction_set_features_arm.h"
+#include "code_generator_utils.h"
+#include "common_arm.h"
+#include "mirror/array-inl.h"
+#include "scheduler_arm.h"
+
+namespace art {
+namespace arm {
+
+using helpers::Int32ConstantFrom;
+using helpers::Uint64ConstantFrom;
+
+void SchedulingLatencyVisitorARM::HandleBinaryOperationLantencies(HBinaryOperation* instr) {
+ switch (instr->GetResultType()) {
+ case Primitive::kPrimLong:
+ // HAdd and HSub long operations translate to ADDS+ADC or SUBS+SBC pairs,
+ // so a bubble (kArmNopLatency) is added to represent the internal carry flag
+ // dependency inside these pairs.
+ last_visited_internal_latency_ = kArmIntegerOpLatency + kArmNopLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ default:
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitAdd(HAdd* instr) {
+ HandleBinaryOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitSub(HSub* instr) {
+ HandleBinaryOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitMul(HMul* instr) {
+ switch (instr->GetResultType()) {
+ case Primitive::kPrimLong:
+ last_visited_internal_latency_ = 3 * kArmMulIntegerLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ last_visited_latency_ = kArmMulFloatingPointLatency;
+ break;
+ default:
+ last_visited_latency_ = kArmMulIntegerLatency;
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleBitwiseOperationLantencies(HBinaryOperation* instr) {
+ switch (instr->GetResultType()) {
+ case Primitive::kPrimLong:
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ default:
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitAnd(HAnd* instr) {
+ HandleBitwiseOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitOr(HOr* instr) {
+ HandleBitwiseOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitXor(HXor* instr) {
+ HandleBitwiseOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitRor(HRor* instr) {
+ switch (instr->GetResultType()) {
+ case Primitive::kPrimInt:
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimLong: {
+ // HandleLongRotate
+ HInstruction* rhs = instr->GetRight();
+ if (rhs->IsConstant()) {
+ uint64_t rot = Uint64ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
+ if (rot != 0u) {
+ last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ }
+ } else {
+ last_visited_internal_latency_ = 9 * kArmIntegerOpLatency + kArmBranchLatency;
+ last_visited_latency_ = kArmBranchLatency;
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected operation type " << instr->GetResultType();
+ UNREACHABLE();
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleShiftLatencies(HBinaryOperation* instr) {
+ Primitive::Type type = instr->GetResultType();
+ HInstruction* rhs = instr->GetRight();
+ switch (type) {
+ case Primitive::kPrimInt:
+ if (!rhs->IsConstant()) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ }
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimLong:
+ if (!rhs->IsConstant()) {
+ last_visited_internal_latency_ = 8 * kArmIntegerOpLatency;
+ } else {
+ uint32_t shift_value = Int32ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
+ if (shift_value == 1 || shift_value >= 32) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
+ }
+ }
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ default:
+ LOG(FATAL) << "Unexpected operation type " << type;
+ UNREACHABLE();
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitShl(HShl* instr) {
+ HandleShiftLatencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitShr(HShr* instr) {
+ HandleShiftLatencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitUShr(HUShr* instr) {
+ HandleShiftLatencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) {
+ switch (instr->GetLeft()->GetType()) {
+ case Primitive::kPrimLong:
+ last_visited_internal_latency_ = 4 * kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ last_visited_internal_latency_ = 2 * kArmFloatingPointOpLatency;
+ break;
+ default:
+ last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
+ break;
+ }
+ last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitCompare(HCompare* instr) {
+ Primitive::Type type = instr->InputAt(0)->GetType();
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimInt:
+ last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimLong:
+ last_visited_internal_latency_ = 2 * kArmIntegerOpLatency + 3 * kArmBranchLatency;
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ last_visited_internal_latency_ = kArmIntegerOpLatency + 2 * kArmFloatingPointOpLatency;
+ break;
+ default:
+ last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
+ break;
+ }
+ last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
+ if (instruction->GetResultType() == Primitive::kPrimInt) {
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleGenerateDataProcInstruction(bool internal_latency) {
+ if (internal_latency) {
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ } else {
+ last_visited_latency_ = kArmDataProcWithShifterOpLatency;
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleGenerateDataProc(HDataProcWithShifterOp* instruction) {
+ const HInstruction::InstructionKind kind = instruction->GetInstrKind();
+ if (kind == HInstruction::kAdd) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else if (kind == HInstruction::kSub) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ HandleGenerateDataProcInstruction(/* internal_latency */ true);
+ HandleGenerateDataProcInstruction();
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction) {
+ DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong);
+ DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
+
+ const uint32_t shift_value = instruction->GetShiftAmount();
+ const HInstruction::InstructionKind kind = instruction->GetInstrKind();
+
+ if (shift_value >= 32) {
+ // Different shift types actually generate similar code here,
+ // no need to differentiate shift types like the codegen pass does,
+ // which also avoids handling shift types from different ARM backends.
+ HandleGenerateDataProc(instruction);
+ } else {
+ DCHECK_GT(shift_value, 1U);
+ DCHECK_LT(shift_value, 32U);
+
+ if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
+ HandleGenerateDataProcInstruction(/* internal_latency */ true);
+ HandleGenerateDataProcInstruction(/* internal_latency */ true);
+ HandleGenerateDataProcInstruction();
+ } else {
+ last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
+ HandleGenerateDataProc(instruction);
+ }
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) {
+ const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
+
+ if (instruction->GetType() == Primitive::kPrimInt) {
+ DCHECK(!HDataProcWithShifterOp::IsExtensionOp(op_kind));
+ HandleGenerateDataProcInstruction();
+ } else {
+ DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong);
+ if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
+ HandleGenerateDataProc(instruction);
+ } else {
+ HandleGenerateLongDataProc(instruction);
+ }
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* ATTRIBUTE_UNUSED) {
+ // Although the code generated is a simple `add` instruction, we found through empirical results
+ // that spacing it from its use in memory accesses was beneficial.
+ last_visited_internal_latency_ = kArmNopLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitIntermediateAddressIndex(
+ HIntermediateAddressIndex* ATTRIBUTE_UNUSED) {
+ UNIMPLEMENTED(FATAL) << "IntermediateAddressIndex is not implemented for ARM";
+}
+
+void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArmMulIntegerLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) {
+ Primitive::Type type = instruction->GetType();
+ const bool maybe_compressed_char_at =
+ mirror::kUseStringCompression && instruction->IsStringCharAt();
+ HInstruction* array_instr = instruction->GetArray();
+ bool has_intermediate_address = array_instr->IsIntermediateAddress();
+ HInstruction* index = instruction->InputAt(1);
+
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimInt: {
+ if (maybe_compressed_char_at) {
+ last_visited_internal_latency_ += kArmMemoryLoadLatency;
+ }
+ if (index->IsConstant()) {
+ if (maybe_compressed_char_at) {
+ last_visited_internal_latency_ +=
+ kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
+ last_visited_latency_ = kArmBranchLatency;
+ } else {
+ last_visited_latency_ += kArmMemoryLoadLatency;
+ }
+ } else {
+ if (has_intermediate_address) {
+ } else {
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ }
+ if (maybe_compressed_char_at) {
+ last_visited_internal_latency_ +=
+ kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
+ last_visited_latency_ = kArmBranchLatency;
+ } else {
+ last_visited_latency_ += kArmMemoryLoadLatency;
+ }
+ }
+ break;
+ }
+
+ case Primitive::kPrimNot: {
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency;
+ } else {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ if (has_intermediate_address) {
+ } else {
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ }
+ last_visited_internal_latency_ = kArmMemoryLoadLatency;
+ }
+ }
+ break;
+ }
+
+ case Primitive::kPrimLong: {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+ }
+
+ case Primitive::kPrimFloat: {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+ }
+
+ case Primitive::kPrimDouble: {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unreachable type " << type;
+ UNREACHABLE();
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitArrayLength(HArrayLength* instruction) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+ last_visited_internal_latency_ = kArmMemoryLoadLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) {
+ HInstruction* index = instruction->InputAt(1);
+ Primitive::Type value_type = instruction->GetComponentType();
+ HInstruction* array_instr = instruction->GetArray();
+ bool has_intermediate_address = array_instr->IsIntermediateAddress();
+
+ switch (value_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimInt: {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ } else {
+ if (has_intermediate_address) {
+ } else {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ }
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ }
+ break;
+ }
+
+ case Primitive::kPrimNot: {
+ if (instruction->InputAt(2)->IsNullConstant()) {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ } else {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ }
+ } else {
+ // Following the exact instructions of runtime type checks is too complicated,
+ // just giving it a simple slow latency.
+ last_visited_latency_ = kArmRuntimeTypeCheckLatency;
+ }
+ break;
+ }
+
+ case Primitive::kPrimLong: {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+ }
+
+ case Primitive::kPrimFloat: {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+ }
+
+ case Primitive::kPrimDouble: {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unreachable type " << value_type;
+ UNREACHABLE();
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ // Users do not use any data results.
+ last_visited_latency_ = 0;
+}
+
+void SchedulingLatencyVisitorARM::HandleDivRemConstantIntegralLatencies(int32_t imm) {
+ if (imm == 0) {
+ last_visited_internal_latency_ = 0;
+ last_visited_latency_ = 0;
+ } else if (imm == 1 || imm == -1) {
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+ last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_internal_latency_ = kArmMulIntegerLatency + 2 * kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitDiv(HDiv* instruction) {
+ Primitive::Type type = instruction->GetResultType();
+ switch (type) {
+ case Primitive::kPrimInt: {
+ HInstruction* rhs = instruction->GetRight();
+ if (rhs->IsConstant()) {
+ int32_t imm = Int32ConstantFrom(rhs->AsConstant());
+ HandleDivRemConstantIntegralLatencies(imm);
+ } else {
+ last_visited_latency_ = kArmDivIntegerLatency;
+ }
+ break;
+ }
+ case Primitive::kPrimFloat:
+ last_visited_latency_ = kArmDivFloatLatency;
+ break;
+ case Primitive::kPrimDouble:
+ last_visited_latency_ = kArmDivDoubleLatency;
+ break;
+ default:
+ last_visited_internal_latency_ = kArmCallInternalLatency;
+ last_visited_latency_ = kArmCallLatency;
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+ HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
+}
+
+void SchedulingLatencyVisitorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+ HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
+}
+
+void SchedulingLatencyVisitorARM::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArmCallInternalLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArmCallInternalLatency;
+ last_visited_latency_ = kArmCallLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArmLoadStringInternalLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency + kArmCallInternalLatency;
+ last_visited_latency_ = kArmCallLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitNewInstance(HNewInstance* instruction) {
+ if (instruction->IsStringAlloc()) {
+ last_visited_internal_latency_ = 2 * kArmMemoryLoadLatency + kArmCallInternalLatency;
+ } else {
+ last_visited_internal_latency_ = kArmCallInternalLatency;
+ }
+ last_visited_latency_ = kArmCallLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitRem(HRem* instruction) {
+ Primitive::Type type = instruction->GetResultType();
+ switch (type) {
+ case Primitive::kPrimInt: {
+ HInstruction* rhs = instruction->GetRight();
+ if (rhs->IsConstant()) {
+ int32_t imm = Int32ConstantFrom(rhs->AsConstant());
+ HandleDivRemConstantIntegralLatencies(imm);
+ } else {
+ last_visited_internal_latency_ = kArmDivIntegerLatency;
+ last_visited_latency_ = kArmMulIntegerLatency;
+ }
+ break;
+ }
+ default:
+ last_visited_internal_latency_ = kArmCallInternalLatency;
+ last_visited_latency_ = kArmCallLatency;
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruction,
+ const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+ DCHECK(codegen_ != nullptr);
+ bool is_volatile = field_info.IsVolatile();
+ Primitive::Type field_type = field_info.GetFieldType();
+ bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+
+ switch (field_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimInt:
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ break;
+
+ case Primitive::kPrimNot:
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+
+ case Primitive::kPrimLong:
+ if (is_volatile && !atomic_ldrd_strd) {
+ last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+
+ case Primitive::kPrimFloat:
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ break;
+
+ case Primitive::kPrimDouble:
+ if (is_volatile && !atomic_ldrd_strd) {
+ last_visited_internal_latency_ =
+ kArmMemoryLoadLatency + kArmIntegerOpLatency + kArmMemoryLoadLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+
+ default:
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ break;
+ }
+
+ if (is_volatile) {
+ last_visited_internal_latency_ += kArmMemoryBarrierLatency;
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleFieldSetLatencies(HInstruction* instruction,
+ const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
+ DCHECK(codegen_ != nullptr);
+ bool is_volatile = field_info.IsVolatile();
+ Primitive::Type field_type = field_info.GetFieldType();
+ bool needs_write_barrier =
+ CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
+ bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+
+ switch (field_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ if (is_volatile) {
+ last_visited_internal_latency_ = kArmMemoryBarrierLatency + kArmMemoryStoreLatency;
+ last_visited_latency_ = kArmMemoryBarrierLatency;
+ } else {
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ }
+ break;
+
+ case Primitive::kPrimInt:
+ case Primitive::kPrimNot:
+ if (kPoisonHeapReferences && needs_write_barrier) {
+ last_visited_internal_latency_ += kArmIntegerOpLatency * 2;
+ }
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ break;
+
+ case Primitive::kPrimLong:
+ if (is_volatile && !atomic_ldrd_strd) {
+ last_visited_internal_latency_ =
+ kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ }
+ break;
+
+ case Primitive::kPrimFloat:
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ break;
+
+ case Primitive::kPrimDouble:
+ if (is_volatile && !atomic_ldrd_strd) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency +
+ kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ }
+ break;
+
+ default:
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+ HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
+}
+
+void SchedulingLatencyVisitorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+ HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
+}
+
+void SchedulingLatencyVisitorARM::VisitSuspendCheck(HSuspendCheck* instruction) {
+ HBasicBlock* block = instruction->GetBlock();
+ DCHECK((block->GetLoopInformation() != nullptr) ||
+ (block->IsEntryBlock() && instruction->GetNext()->IsGoto()));
+ // Users do not use any data results.
+ last_visited_latency_ = 0;
+}
+
+void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) {
+ Primitive::Type result_type = instr->GetResultType();
+ Primitive::Type input_type = instr->GetInputType();
+
+ switch (result_type) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ last_visited_latency_ = kArmIntegerOpLatency; // SBFX or UBFX
+ break;
+
+ case Primitive::kPrimInt:
+ switch (input_type) {
+ case Primitive::kPrimLong:
+ last_visited_latency_ = kArmIntegerOpLatency; // MOV
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ default:
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ }
+ break;
+
+ case Primitive::kPrimLong:
+ switch (input_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ // MOV and extension
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ // invokes runtime
+ last_visited_internal_latency_ = kArmCallInternalLatency;
+ break;
+ default:
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ }
+ break;
+
+ case Primitive::kPrimFloat:
+ switch (input_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ case Primitive::kPrimLong:
+ // invokes runtime
+ last_visited_internal_latency_ = kArmCallInternalLatency;
+ break;
+ case Primitive::kPrimDouble:
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ default:
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ }
+ break;
+
+ case Primitive::kPrimDouble:
+ switch (input_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ case Primitive::kPrimLong:
+ last_visited_internal_latency_ = 5 * kArmFloatingPointOpLatency;
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ case Primitive::kPrimFloat:
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ default:
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ }
+ break;
+
+ default:
+ last_visited_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitArmDexCacheArraysBase(art::HArmDexCacheArraysBase*) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+} // namespace arm
+} // namespace art
diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h
new file mode 100644
index 0000000000..897e97da49
--- /dev/null
+++ b/compiler/optimizing/scheduler_arm.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
+#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
+
+#ifdef ART_USE_OLD_ARM_BACKEND
+#include "code_generator_arm.h"
+#else
+#include "code_generator_arm_vixl.h"
+#endif
+#include "scheduler.h"
+
+namespace art {
+namespace arm {
+#ifdef ART_USE_OLD_ARM_BACKEND
+typedef CodeGeneratorARM CodeGeneratorARMType;
+#else
+typedef CodeGeneratorARMVIXL CodeGeneratorARMType;
+#endif
+
+// AArch32 instruction latencies.
+// We currently assume that all ARM CPUs share the same instruction latency list.
+// The following latencies were tuned based on performance experiments and
+// automatic tuning using differential evolution approach on various benchmarks.
+static constexpr uint32_t kArmIntegerOpLatency = 2;
+static constexpr uint32_t kArmFloatingPointOpLatency = 11;
+static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4;
+static constexpr uint32_t kArmMulIntegerLatency = 6;
+static constexpr uint32_t kArmMulFloatingPointLatency = 11;
+static constexpr uint32_t kArmDivIntegerLatency = 10;
+static constexpr uint32_t kArmDivFloatLatency = 20;
+static constexpr uint32_t kArmDivDoubleLatency = 25;
+static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11;
+static constexpr uint32_t kArmMemoryLoadLatency = 9;
+static constexpr uint32_t kArmMemoryStoreLatency = 9;
+static constexpr uint32_t kArmMemoryBarrierLatency = 6;
+static constexpr uint32_t kArmBranchLatency = 4;
+static constexpr uint32_t kArmCallLatency = 5;
+static constexpr uint32_t kArmCallInternalLatency = 29;
+static constexpr uint32_t kArmLoadStringInternalLatency = 10;
+static constexpr uint32_t kArmNopLatency = 2;
+static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
+static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
+
+class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
+ public:
+ explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
+ : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {}
+
+ // Default visitor for instructions not handled specifically below.
+ void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArmIntegerOpLatency;
+ }
+
+// We add a second unused parameter to be able to use this macro like the others
+// defined in `nodes.h`.
+#define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \
+ M(ArrayGet , unused) \
+ M(ArrayLength , unused) \
+ M(ArraySet , unused) \
+ M(Add , unused) \
+ M(Sub , unused) \
+ M(And , unused) \
+ M(Or , unused) \
+ M(Ror , unused) \
+ M(Xor , unused) \
+ M(Shl , unused) \
+ M(Shr , unused) \
+ M(UShr , unused) \
+ M(Mul , unused) \
+ M(Div , unused) \
+ M(Condition , unused) \
+ M(Compare , unused) \
+ M(BoundsCheck , unused) \
+ M(InstanceFieldGet , unused) \
+ M(InstanceFieldSet , unused) \
+ M(InstanceOf , unused) \
+ M(Invoke , unused) \
+ M(LoadString , unused) \
+ M(NewArray , unused) \
+ M(NewInstance , unused) \
+ M(Rem , unused) \
+ M(StaticFieldGet , unused) \
+ M(StaticFieldSet , unused) \
+ M(SuspendCheck , unused) \
+ M(TypeConversion , unused)
+
+#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
+ M(BitwiseNegatedRight, unused) \
+ M(MultiplyAccumulate, unused) \
+ M(IntermediateAddress, unused) \
+ M(IntermediateAddressIndex, unused) \
+ M(DataProcWithShifterOp, unused)
+
+#define DECLARE_VISIT_INSTRUCTION(type, unused) \
+ void Visit##type(H##type* instruction) OVERRIDE;
+
+ FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+ private:
+ void HandleBinaryOperationLantencies(HBinaryOperation* instr);
+ void HandleBitwiseOperationLantencies(HBinaryOperation* instr);
+ void HandleShiftLatencies(HBinaryOperation* instr);
+ void HandleDivRemConstantIntegralLatencies(int32_t imm);
+ void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info);
+ void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info);
+ void HandleGenerateDataProcInstruction(bool internal_latency = false);
+ void HandleGenerateDataProc(HDataProcWithShifterOp* instruction);
+ void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction);
+
+ // The latency setting for each HInstruction depends on how CodeGenerator may generate code,
+ // latency visitors may query CodeGenerator for such information for accurate latency settings.
+ CodeGeneratorARMType* codegen_;
+};
+
+class HSchedulerARM : public HScheduler {
+ public:
+ HSchedulerARM(ArenaAllocator* arena,
+ SchedulingNodeSelector* selector,
+ SchedulingLatencyVisitorARM* arm_latency_visitor)
+ : HScheduler(arena, arm_latency_visitor, selector) {}
+ ~HSchedulerARM() OVERRIDE {}
+
+ bool IsSchedulable(const HInstruction* instruction) const OVERRIDE {
+#define CASE_INSTRUCTION_KIND(type, unused) case \
+ HInstruction::InstructionKind::k##type:
+ switch (instruction->GetKind()) {
+ FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
+ return true;
+ FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND)
+ return true;
+ default:
+ return HScheduler::IsSchedulable(instruction);
+ }
+#undef CASE_INSTRUCTION_KIND
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HSchedulerARM);
+};
+
+} // namespace arm
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc
index 558dcc4cbc..83b487fb5b 100644
--- a/compiler/optimizing/scheduler_arm64.cc
+++ b/compiler/optimizing/scheduler_arm64.cc
@@ -16,6 +16,7 @@
#include "scheduler_arm64.h"
#include "code_generator_utils.h"
+#include "mirror/array-inl.h"
namespace art {
namespace arm64 {
@@ -43,6 +44,13 @@ void SchedulingLatencyVisitorARM64::VisitIntermediateAddress(
last_visited_latency_ = kArm64IntegerOpLatency + 2;
}
+void SchedulingLatencyVisitorARM64::VisitIntermediateAddressIndex(
+ HIntermediateAddressIndex* instr ATTRIBUTE_UNUSED) {
+ // Although the code generated is a simple `add` instruction, we found through empirical results
+ // that spacing it from its use in memory accesses was beneficial.
+ last_visited_latency_ = kArm64DataProcWithShifterOpLatency + 2;
+}
+
void SchedulingLatencyVisitorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
last_visited_latency_ = kArm64MulIntegerLatency;
}
@@ -192,5 +200,148 @@ void SchedulingLatencyVisitorARM64::VisitTypeConversion(HTypeConversion* instr)
}
}
+void SchedulingLatencyVisitorARM64::HandleSimpleArithmeticSIMD(HVecOperation *instr) {
+ if (Primitive::IsFloatingPointType(instr->GetPackedType())) {
+ last_visited_latency_ = kArm64SIMDFloatingPointOpLatency;
+ } else {
+ last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+ }
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecReplicateScalar(
+ HVecReplicateScalar* instr ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64SIMDReplicateOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) {
+ LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecSumReduce(HVecSumReduce* instr) {
+ LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecCnv(HVecCnv* instr ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64SIMDTypeConversionInt2FPLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecNeg(HVecNeg* instr) {
+ HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecAbs(HVecAbs* instr) {
+ HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecNot(HVecNot* instr) {
+ if (instr->GetPackedType() == Primitive::kPrimBoolean) {
+ last_visited_internal_latency_ = kArm64SIMDIntegerOpLatency;
+ }
+ last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecAdd(HVecAdd* instr) {
+ HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instr) {
+ HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecSub(HVecSub* instr) {
+ HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecMul(HVecMul* instr) {
+ if (Primitive::IsFloatingPointType(instr->GetPackedType())) {
+ last_visited_latency_ = kArm64SIMDMulFloatingPointLatency;
+ } else {
+ last_visited_latency_ = kArm64SIMDMulIntegerLatency;
+ }
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecDiv(HVecDiv* instr) {
+ if (instr->GetPackedType() == Primitive::kPrimFloat) {
+ last_visited_latency_ = kArm64SIMDDivFloatLatency;
+ } else {
+ DCHECK(instr->GetPackedType() == Primitive::kPrimDouble);
+ last_visited_latency_ = kArm64SIMDDivDoubleLatency;
+ }
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecMin(HVecMin* instr) {
+ HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecMax(HVecMax* instr) {
+ HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecAnd(HVecAnd* instr ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr) {
+ LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecOr(HVecOr* instr ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecXor(HVecXor* instr ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecShl(HVecShl* instr) {
+ HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecShr(HVecShr* instr) {
+ HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecUShr(HVecUShr* instr) {
+ HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecMultiplyAccumulate(
+ HVecMultiplyAccumulate* instr ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64SIMDMulIntegerLatency;
+}
+
+void SchedulingLatencyVisitorARM64::HandleVecAddress(
+ HVecMemoryOperation* instruction,
+ size_t size ATTRIBUTE_UNUSED) {
+ HInstruction* index = instruction->InputAt(1);
+ if (!index->IsConstant()) {
+ last_visited_internal_latency_ += kArm64DataProcWithShifterOpLatency;
+ }
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecLoad(HVecLoad* instr) {
+ last_visited_internal_latency_ = 0;
+ size_t size = Primitive::ComponentSize(instr->GetPackedType());
+
+ if (instr->GetPackedType() == Primitive::kPrimChar
+ && mirror::kUseStringCompression
+ && instr->IsStringCharAt()) {
+ // Set latencies for the uncompressed case.
+ last_visited_internal_latency_ += kArm64MemoryLoadLatency + kArm64BranchLatency;
+ HandleVecAddress(instr, size);
+ last_visited_latency_ = kArm64SIMDMemoryLoadLatency;
+ } else {
+ HandleVecAddress(instr, size);
+ last_visited_latency_ = kArm64SIMDMemoryLoadLatency;
+ }
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecStore(HVecStore* instr) {
+ last_visited_internal_latency_ = 0;
+ size_t size = Primitive::ComponentSize(instr->GetPackedType());
+ HandleVecAddress(instr, size);
+ last_visited_latency_ = kArm64SIMDMemoryStoreLatency;
+}
+
} // namespace arm64
} // namespace art
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
index 7a33720655..63d5b7d6b6 100644
--- a/compiler/optimizing/scheduler_arm64.h
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -42,6 +42,18 @@ static constexpr uint32_t kArm64LoadStringInternalLatency = 7;
static constexpr uint32_t kArm64MulFloatingPointLatency = 6;
static constexpr uint32_t kArm64MulIntegerLatency = 6;
static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5;
+static constexpr uint32_t kArm64BranchLatency = kArm64IntegerOpLatency;
+
+static constexpr uint32_t kArm64SIMDFloatingPointOpLatency = 10;
+static constexpr uint32_t kArm64SIMDIntegerOpLatency = 6;
+static constexpr uint32_t kArm64SIMDMemoryLoadLatency = 10;
+static constexpr uint32_t kArm64SIMDMemoryStoreLatency = 6;
+static constexpr uint32_t kArm64SIMDMulFloatingPointLatency = 12;
+static constexpr uint32_t kArm64SIMDMulIntegerLatency = 12;
+static constexpr uint32_t kArm64SIMDReplicateOpLatency = 16;
+static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60;
+static constexpr uint32_t kArm64SIMDDivFloatLatency = 30;
+static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10;
class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
public:
@@ -52,29 +64,54 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
// We add a second unused parameter to be able to use this macro like the others
// defined in `nodes.h`.
-#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \
- M(ArrayGet , unused) \
- M(ArrayLength , unused) \
- M(ArraySet , unused) \
- M(BinaryOperation , unused) \
- M(BoundsCheck , unused) \
- M(Div , unused) \
- M(InstanceFieldGet , unused) \
- M(InstanceOf , unused) \
- M(Invoke , unused) \
- M(LoadString , unused) \
- M(Mul , unused) \
- M(NewArray , unused) \
- M(NewInstance , unused) \
- M(Rem , unused) \
- M(StaticFieldGet , unused) \
- M(SuspendCheck , unused) \
- M(TypeConversion , unused)
+#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \
+ M(ArrayGet , unused) \
+ M(ArrayLength , unused) \
+ M(ArraySet , unused) \
+ M(BinaryOperation , unused) \
+ M(BoundsCheck , unused) \
+ M(Div , unused) \
+ M(InstanceFieldGet , unused) \
+ M(InstanceOf , unused) \
+ M(Invoke , unused) \
+ M(LoadString , unused) \
+ M(Mul , unused) \
+ M(NewArray , unused) \
+ M(NewInstance , unused) \
+ M(Rem , unused) \
+ M(StaticFieldGet , unused) \
+ M(SuspendCheck , unused) \
+ M(TypeConversion , unused) \
+ M(VecReplicateScalar , unused) \
+ M(VecSetScalars , unused) \
+ M(VecSumReduce , unused) \
+ M(VecCnv , unused) \
+ M(VecNeg , unused) \
+ M(VecAbs , unused) \
+ M(VecNot , unused) \
+ M(VecAdd , unused) \
+ M(VecHalvingAdd , unused) \
+ M(VecSub , unused) \
+ M(VecMul , unused) \
+ M(VecDiv , unused) \
+ M(VecMin , unused) \
+ M(VecMax , unused) \
+ M(VecAnd , unused) \
+ M(VecAndNot , unused) \
+ M(VecOr , unused) \
+ M(VecXor , unused) \
+ M(VecShl , unused) \
+ M(VecShr , unused) \
+ M(VecUShr , unused) \
+ M(VecMultiplyAccumulate, unused) \
+ M(VecLoad , unused) \
+ M(VecStore , unused)
#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
M(BitwiseNegatedRight, unused) \
M(MultiplyAccumulate, unused) \
M(IntermediateAddress, unused) \
+ M(IntermediateAddressIndex, unused) \
M(DataProcWithShifterOp, unused)
#define DECLARE_VISIT_INSTRUCTION(type, unused) \
@@ -85,6 +122,10 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
#undef DECLARE_VISIT_INSTRUCTION
+
+ private:
+ void HandleSimpleArithmeticSIMD(HVecOperation *instr);
+ void HandleVecAddress(HVecMemoryOperation* instruction, size_t size);
};
class HSchedulerARM64 : public HScheduler {
@@ -101,6 +142,8 @@ class HSchedulerARM64 : public HScheduler {
return true;
FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND)
return true;
+ FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(CASE_INSTRUCTION_KIND)
+ return true;
default:
return HScheduler::IsSchedulable(instruction);
}
diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc
index 31d13e2a26..d87600aa5e 100644
--- a/compiler/optimizing/scheduler_test.cc
+++ b/compiler/optimizing/scheduler_test.cc
@@ -28,6 +28,10 @@
#include "scheduler_arm64.h"
#endif
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "scheduler_arm.h"
+#endif
+
namespace art {
// Return all combinations of ISA and code generator that are executable on
@@ -56,7 +60,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
#endif
};
- for (auto test_config : test_config_candidates) {
+ for (const CodegenTargetConfig& test_config : test_config_candidates) {
if (CanExecute(test_config.GetInstructionSet())) {
v.push_back(test_config);
}
@@ -65,133 +69,151 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
return v;
}
-class SchedulerTest : public CommonCompilerTest {};
-
-#ifdef ART_ENABLE_CODEGEN_arm64
-TEST_F(SchedulerTest, DependencyGraph) {
- ArenaPool pool;
- ArenaAllocator allocator(&pool);
- HGraph* graph = CreateGraph(&allocator);
- HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
- HBasicBlock* block1 = new (&allocator) HBasicBlock(graph);
- graph->AddBlock(entry);
- graph->AddBlock(block1);
- graph->SetEntryBlock(entry);
-
- // entry:
- // array ParameterValue
- // c1 IntConstant
- // c2 IntConstant
- // block1:
- // add1 Add [c1, c2]
- // add2 Add [add1, c2]
- // mul Mul [add1, add2]
- // div_check DivZeroCheck [add2] (env: add2, mul)
- // div Div [add1, div_check]
- // array_get1 ArrayGet [array, add1]
- // array_set1 ArraySet [array, add1, add2]
- // array_get2 ArrayGet [array, add1]
- // array_set2 ArraySet [array, add1, add2]
-
- HInstruction* array = new (&allocator) HParameterValue(graph->GetDexFile(),
- dex::TypeIndex(0),
- 0,
- Primitive::kPrimNot);
- HInstruction* c1 = graph->GetIntConstant(1);
- HInstruction* c2 = graph->GetIntConstant(10);
- HInstruction* add1 = new (&allocator) HAdd(Primitive::kPrimInt, c1, c2);
- HInstruction* add2 = new (&allocator) HAdd(Primitive::kPrimInt, add1, c2);
- HInstruction* mul = new (&allocator) HMul(Primitive::kPrimInt, add1, add2);
- HInstruction* div_check = new (&allocator) HDivZeroCheck(add2, 0);
- HInstruction* div = new (&allocator) HDiv(Primitive::kPrimInt, add1, div_check, 0);
- HInstruction* array_get1 = new (&allocator) HArrayGet(array, add1, Primitive::kPrimInt, 0);
- HInstruction* array_set1 = new (&allocator) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
- HInstruction* array_get2 = new (&allocator) HArrayGet(array, add1, Primitive::kPrimInt, 0);
- HInstruction* array_set2 = new (&allocator) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
-
- DCHECK(div_check->CanThrow());
-
- entry->AddInstruction(array);
-
- HInstruction* block_instructions[] = {add1,
- add2,
- mul,
- div_check,
- div,
- array_get1,
- array_set1,
- array_get2,
- array_set2};
- for (auto instr : block_instructions) {
- block1->AddInstruction(instr);
+class SchedulerTest : public CommonCompilerTest {
+ public:
+ SchedulerTest() : pool_(), allocator_(&pool_) {
+ graph_ = CreateGraph(&allocator_);
}
- HEnvironment* environment = new (&allocator) HEnvironment(&allocator,
- 2,
- graph->GetArtMethod(),
+ // Build scheduling graph, and run target specific scheduling on it.
+ void TestBuildDependencyGraphAndSchedule(HScheduler* scheduler) {
+ HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
+ HBasicBlock* block1 = new (&allocator_) HBasicBlock(graph_);
+ graph_->AddBlock(entry);
+ graph_->AddBlock(block1);
+ graph_->SetEntryBlock(entry);
+
+ // entry:
+ // array ParameterValue
+ // c1 IntConstant
+ // c2 IntConstant
+ // block1:
+ // add1 Add [c1, c2]
+ // add2 Add [add1, c2]
+ // mul Mul [add1, add2]
+ // div_check DivZeroCheck [add2] (env: add2, mul)
+ // div Div [add1, div_check]
+ // array_get1 ArrayGet [array, add1]
+ // array_set1 ArraySet [array, add1, add2]
+ // array_get2 ArrayGet [array, add1]
+ // array_set2 ArraySet [array, add1, add2]
+
+ HInstruction* array = new (&allocator_) HParameterValue(graph_->GetDexFile(),
+ dex::TypeIndex(0),
0,
- div_check);
- div_check->SetRawEnvironment(environment);
- environment->SetRawEnvAt(0, add2);
- add2->AddEnvUseAt(div_check->GetEnvironment(), 0);
- environment->SetRawEnvAt(1, mul);
- mul->AddEnvUseAt(div_check->GetEnvironment(), 1);
-
- ArenaAllocator* arena = graph->GetArena();
- CriticalPathSchedulingNodeSelector critical_path_selector;
- arm64::HSchedulerARM64 scheduler(arena, &critical_path_selector);
- SchedulingGraph scheduling_graph(&scheduler, arena);
- // Instructions must be inserted in reverse order into the scheduling graph.
- for (auto instr : ReverseRange(block_instructions)) {
- scheduling_graph.AddNode(instr);
+ Primitive::kPrimNot);
+ HInstruction* c1 = graph_->GetIntConstant(1);
+ HInstruction* c2 = graph_->GetIntConstant(10);
+ HInstruction* add1 = new (&allocator_) HAdd(Primitive::kPrimInt, c1, c2);
+ HInstruction* add2 = new (&allocator_) HAdd(Primitive::kPrimInt, add1, c2);
+ HInstruction* mul = new (&allocator_) HMul(Primitive::kPrimInt, add1, add2);
+ HInstruction* div_check = new (&allocator_) HDivZeroCheck(add2, 0);
+ HInstruction* div = new (&allocator_) HDiv(Primitive::kPrimInt, add1, div_check, 0);
+ HInstruction* array_get1 = new (&allocator_) HArrayGet(array, add1, Primitive::kPrimInt, 0);
+ HInstruction* array_set1 = new (&allocator_) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
+ HInstruction* array_get2 = new (&allocator_) HArrayGet(array, add1, Primitive::kPrimInt, 0);
+ HInstruction* array_set2 = new (&allocator_) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
+
+ DCHECK(div_check->CanThrow());
+
+ entry->AddInstruction(array);
+
+ HInstruction* block_instructions[] = {add1,
+ add2,
+ mul,
+ div_check,
+ div,
+ array_get1,
+ array_set1,
+ array_get2,
+ array_set2};
+ for (HInstruction* instr : block_instructions) {
+ block1->AddInstruction(instr);
+ }
+
+ HEnvironment* environment = new (&allocator_) HEnvironment(&allocator_,
+ 2,
+ graph_->GetArtMethod(),
+ 0,
+ div_check);
+ div_check->SetRawEnvironment(environment);
+ environment->SetRawEnvAt(0, add2);
+ add2->AddEnvUseAt(div_check->GetEnvironment(), 0);
+ environment->SetRawEnvAt(1, mul);
+ mul->AddEnvUseAt(div_check->GetEnvironment(), 1);
+
+ SchedulingGraph scheduling_graph(scheduler, graph_->GetArena());
+ // Instructions must be inserted in reverse order into the scheduling graph.
+ for (HInstruction* instr : ReverseRange(block_instructions)) {
+ scheduling_graph.AddNode(instr);
+ }
+
+ // Should not have dependencies cross basic blocks.
+ ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, c1));
+ ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add2, c2));
+
+ // Define-use dependency.
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(add2, add1));
+ ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, add2));
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div_check, add2));
+ ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(div_check, add1));
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div, div_check));
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add1));
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add2));
+
+ // Read and write dependencies
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, array_get1));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_get2));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_get2, array_set1));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1));
+
+ // Env dependency.
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(div_check, mul));
+ ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(mul, div_check));
+
+ // CanThrow.
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, div_check));
+
+ // Exercise the code path of target specific scheduler and SchedulingLatencyVisitor.
+ scheduler->Schedule(graph_);
}
- // Should not have dependencies cross basic blocks.
- ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, c1));
- ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add2, c2));
-
- // Define-use dependency.
- ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(add2, add1));
- ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, add2));
- ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div_check, add2));
- ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(div_check, add1));
- ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div, div_check));
- ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add1));
- ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add2));
-
- // Read and write dependencies
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, array_get1));
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_get2));
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_get2, array_set1));
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1));
-
- // Env dependency.
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(div_check, mul));
- ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(mul, div_check));
-
- // CanThrow.
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, div_check));
+ void CompileWithRandomSchedulerAndRun(const uint16_t* data, bool has_result, int expected) {
+ for (CodegenTargetConfig target_config : GetTargetConfigs()) {
+ HGraph* graph = CreateCFG(&allocator_, data);
+
+ // Schedule the graph randomly.
+ HInstructionScheduling scheduling(graph, target_config.GetInstructionSet());
+ scheduling.Run(/*only_optimize_loop_blocks*/ false, /*schedule_randomly*/ true);
+
+ RunCode(target_config,
+ graph,
+ [](HGraph* graph_arg) { RemoveSuspendChecks(graph_arg); },
+ has_result, expected);
+ }
+ }
+
+ ArenaPool pool_;
+ ArenaAllocator allocator_;
+ HGraph* graph_;
+};
+
+#if defined(ART_ENABLE_CODEGEN_arm64)
+TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM64) {
+ CriticalPathSchedulingNodeSelector critical_path_selector;
+ arm64::HSchedulerARM64 scheduler(&allocator_, &critical_path_selector);
+ TestBuildDependencyGraphAndSchedule(&scheduler);
}
#endif
-static void CompileWithRandomSchedulerAndRun(const uint16_t* data,
- bool has_result,
- int expected) {
- for (CodegenTargetConfig target_config : GetTargetConfigs()) {
- ArenaPool pool;
- ArenaAllocator arena(&pool);
- HGraph* graph = CreateCFG(&arena, data);
-
- // Schedule the graph randomly.
- HInstructionScheduling scheduling(graph, target_config.GetInstructionSet());
- scheduling.Run(/*only_optimize_loop_blocks*/ false, /*schedule_randomly*/ true);
-
- RunCode(target_config,
- graph,
- [](HGraph* graph_arg) { RemoveSuspendChecks(graph_arg); },
- has_result, expected);
- }
+#if defined(ART_ENABLE_CODEGEN_arm)
+TEST_F(SchedulerTest, DependencyGrapAndSchedulerARM) {
+ CriticalPathSchedulingNodeSelector critical_path_selector;
+ arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr);
+ arm::HSchedulerARM scheduler(&allocator_, &critical_path_selector, &arm_latency_visitor);
+ TestBuildDependencyGraphAndSchedule(&scheduler);
}
+#endif
TEST_F(SchedulerTest, RandomScheduling) {
//
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index eedaf6e67e..106b709eda 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -16,6 +16,7 @@
#include "sharpening.h"
+#include "art_method-inl.h"
#include "base/casts.h"
#include "base/enums.h"
#include "class_linker.h"
@@ -41,7 +42,9 @@ void HSharpening::Run() {
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* instruction = it.Current();
if (instruction->IsInvokeStaticOrDirect()) {
- SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(), codegen_);
+ SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(),
+ codegen_,
+ compiler_driver_);
} else if (instruction->IsLoadString()) {
ProcessLoadString(instruction->AsLoadString());
}
@@ -56,7 +59,7 @@ static bool IsInBootImage(ArtMethod* method) {
const std::vector<gc::space::ImageSpace*>& image_spaces =
Runtime::Current()->GetHeap()->GetBootImageSpaces();
for (gc::space::ImageSpace* image_space : image_spaces) {
- const auto& method_section = image_space->GetImageHeader().GetMethodsSection();
+ const ImageSection& method_section = image_space->GetImageHeader().GetMethodsSection();
if (method_section.Contains(reinterpret_cast<uint8_t*>(method) - image_space->Begin())) {
return true;
}
@@ -68,9 +71,21 @@ static bool AOTCanEmbedMethod(ArtMethod* method, const CompilerOptions& options)
return IsInBootImage(method) && !options.GetCompilePic();
}
+static bool BootImageAOTCanEmbedMethod(ArtMethod* method, CompilerDriver* compiler_driver) {
+ DCHECK(compiler_driver->GetCompilerOptions().IsBootImage());
+ if (!compiler_driver->GetSupportBootImageFixup()) {
+ return false;
+ }
+ ScopedObjectAccess soa(Thread::Current());
+ ObjPtr<mirror::Class> klass = method->GetDeclaringClass();
+ DCHECK(klass != nullptr);
+ const DexFile& dex_file = klass->GetDexFile();
+ return compiler_driver->IsImageClass(dex_file.StringByTypeIdx(klass->GetDexTypeIndex()));
+}
void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
- CodeGenerator* codegen) {
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver) {
if (invoke->IsStringInit()) {
// Not using the dex cache arrays. But we could still try to use a better dispatch...
// TODO: Use direct_method and direct_code for the appropriate StringFactory method.
@@ -108,6 +123,10 @@ void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress;
method_load_data = reinterpret_cast<uintptr_t>(callee);
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+ } else if (codegen->GetCompilerOptions().IsBootImage() &&
+ BootImageAOTCanEmbedMethod(callee, compiler_driver)) {
+ method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative;
+ code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
} else {
// Use PC-relative access to the dex cache arrays.
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative;
@@ -140,7 +159,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class,
CompilerDriver* compiler_driver,
const DexCompilationUnit& dex_compilation_unit) {
Handle<mirror::Class> klass = load_class->GetClass();
- DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCacheViaMethod ||
+ DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kRuntimeCall ||
load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass)
<< load_class->GetLoadKind();
DCHECK(!load_class->IsInBootImage()) << "HLoadClass should not be optimized before sharpening.";
@@ -166,13 +185,11 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class,
DCHECK(!runtime->UseJitCompilation());
if (!compiler_driver->GetSupportBootImageFixup()) {
// compiler_driver_test. Do not sharpen.
- desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
- } else if ((klass != nullptr) && compiler_driver->IsImageClass(
- dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) {
+ desired_load_kind = HLoadClass::LoadKind::kRuntimeCall;
+ } else if ((klass != nullptr) &&
+ compiler_driver->IsImageClass(dex_file.StringByTypeIdx(type_index))) {
is_in_boot_image = true;
- desired_load_kind = codegen->GetCompilerOptions().GetCompilePic()
- ? HLoadClass::LoadKind::kBootImageLinkTimePcRelative
- : HLoadClass::LoadKind::kBootImageLinkTimeAddress;
+ desired_load_kind = HLoadClass::LoadKind::kBootImageLinkTimePcRelative;
} else {
// Not a boot image class.
DCHECK(ContainsElement(compiler_driver->GetDexFilesForOatFile(), &dex_file));
@@ -182,8 +199,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class,
is_in_boot_image = (klass != nullptr) &&
runtime->GetHeap()->ObjectIsInBootImageSpace(klass.Get());
if (runtime->UseJitCompilation()) {
- // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
- // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
+ DCHECK(!codegen->GetCompilerOptions().GetCompilePic());
if (is_in_boot_image) {
// TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787
desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
@@ -194,7 +210,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class,
// this `HLoadClass` hasn't been executed in the interpreter.
// Fallback to the dex cache.
// TODO(ngeoffray): Generate HDeoptimize instead.
- desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+ desired_load_kind = HLoadClass::LoadKind::kRuntimeCall;
}
} else if (is_in_boot_image && !codegen->GetCompilerOptions().GetCompilePic()) {
// AOT app compilation. Check if the class is in the boot image.
@@ -213,7 +229,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class,
}
if (!IsSameDexFile(load_class->GetDexFile(), *dex_compilation_unit.GetDexFile())) {
- if ((load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) ||
+ if ((load_kind == HLoadClass::LoadKind::kRuntimeCall) ||
(load_kind == HLoadClass::LoadKind::kBssEntry)) {
// We actually cannot reference this class, we're forced to bail.
// We cannot reference this class with Bss, as the entrypoint will lookup the class
@@ -225,7 +241,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class,
}
void HSharpening::ProcessLoadString(HLoadString* load_string) {
- DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod);
+ DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall);
const DexFile& dex_file = load_string->GetDexFile();
dex::StringIndex string_index = load_string->GetStringIndex();
@@ -249,16 +265,13 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) {
CHECK(string != nullptr);
if (compiler_driver_->GetSupportBootImageFixup()) {
DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file));
- desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic()
- ? HLoadString::LoadKind::kBootImageLinkTimePcRelative
- : HLoadString::LoadKind::kBootImageLinkTimeAddress;
+ desired_load_kind = HLoadString::LoadKind::kBootImageLinkTimePcRelative;
} else {
// compiler_driver_test. Do not sharpen.
- desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
+ desired_load_kind = HLoadString::LoadKind::kRuntimeCall;
}
} else if (runtime->UseJitCompilation()) {
- // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
- // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
+ DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
string = class_linker->LookupString(dex_file, string_index, dex_cache.Get());
if (string != nullptr) {
if (runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
@@ -267,7 +280,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) {
desired_load_kind = HLoadString::LoadKind::kJitTableAddress;
}
} else {
- desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
+ desired_load_kind = HLoadString::LoadKind::kRuntimeCall;
}
} else {
// AOT app compilation. Try to lookup the string without allocating if not found.
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index 10707c796f..f74b0afdbf 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -55,7 +55,9 @@ class HSharpening : public HOptimization {
REQUIRES_SHARED(Locks::mutator_lock_);
// Used by Sharpening and InstructionSimplifier.
- static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, CodeGenerator* codegen);
+ static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver);
private:
void ProcessLoadString(HLoadString* load_string);
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 0ed8a35338..0f24e81be2 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -652,6 +652,9 @@ class ArmAssembler : public Assembler {
virtual void blx(Register rm, Condition cond = AL) = 0;
virtual void bx(Register rm, Condition cond = AL) = 0;
+ // ADR instruction loading register for branching to the label.
+ virtual void AdrCode(Register rt, Label* label) = 0;
+
// Memory barriers.
virtual void dmb(DmbOptions flavor) = 0;
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 1e71d06b49..d7096b3c87 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -214,14 +214,14 @@ void Thumb2Assembler::EmitFixups(uint32_t adjusted_code_size) {
DCHECK_GE(dest_end, src_end);
for (auto i = fixups_.rbegin(), end = fixups_.rend(); i != end; ++i) {
Fixup* fixup = &*i;
+ size_t old_fixup_location = fixup->GetLocation();
if (fixup->GetOriginalSize() == fixup->GetSize()) {
// The size of this Fixup didn't change. To avoid moving the data
// in small chunks, emit the code to its original position.
- fixup->Emit(&buffer_, adjusted_code_size);
fixup->Finalize(dest_end - src_end);
+ fixup->Emit(old_fixup_location, &buffer_, adjusted_code_size);
} else {
// Move the data between the end of the fixup and src_end to its final location.
- size_t old_fixup_location = fixup->GetLocation();
size_t src_begin = old_fixup_location + fixup->GetOriginalSizeInBytes();
size_t data_size = src_end - src_begin;
size_t dest_begin = dest_end - data_size;
@@ -230,7 +230,7 @@ void Thumb2Assembler::EmitFixups(uint32_t adjusted_code_size) {
dest_end = dest_begin - fixup->GetSizeInBytes();
// Finalize the Fixup and emit the data to the new location.
fixup->Finalize(dest_end - src_end);
- fixup->Emit(&buffer_, adjusted_code_size);
+ fixup->Emit(fixup->GetLocation(), &buffer_, adjusted_code_size);
}
}
CHECK_EQ(src_end, dest_end);
@@ -1895,6 +1895,9 @@ inline size_t Thumb2Assembler::Fixup::SizeInBytes(Size size) {
case kCbxz48Bit:
return 6u;
+ case kCodeAddr4KiB:
+ return 4u;
+
case kLiteral1KiB:
return 2u;
case kLiteral4KiB:
@@ -1973,6 +1976,15 @@ inline int32_t Thumb2Assembler::Fixup::GetOffset(uint32_t current_code_size) con
diff -= 2; // Extra CMP Rn, #0, 16-bit.
break;
+ case kCodeAddr4KiB:
+ // The ADR instruction rounds down the PC+4 to a multiple of 4, so if the PC
+ // isn't a multiple of 2, we need to adjust.
+ DCHECK_ALIGNED(diff, 2);
+ diff += location_ & 2;
+ // Add the Thumb mode bit.
+ diff += 1;
+ break;
+
case kLiteral1KiB:
case kLiteral4KiB:
case kLongOrFPLiteral1KiB:
@@ -1987,8 +1999,8 @@ inline int32_t Thumb2Assembler::Fixup::GetOffset(uint32_t current_code_size) con
diff = diff + (diff & 2);
DCHECK_GE(diff, 0);
break;
- case kLiteral1MiB:
case kLiteral64KiB:
+ case kLiteral1MiB:
case kLongOrFPLiteral64KiB:
case kLiteralAddr64KiB:
DCHECK_GE(diff, 4); // The target must be at least 4 bytes after the ADD rX, PC.
@@ -2041,6 +2053,10 @@ bool Thumb2Assembler::Fixup::IsCandidateForEmitEarly() const {
// We don't support conditional branches beyond +-1MiB.
return true;
+ case kCodeAddr4KiB:
+ // ADR uses the aligned PC and as such the offset cannot be calculated early.
+ return false;
+
case kLiteral1KiB:
case kLiteral4KiB:
case kLiteral64KiB:
@@ -2087,6 +2103,10 @@ uint32_t Thumb2Assembler::Fixup::AdjustSizeIfNeeded(uint32_t current_code_size)
// We don't support conditional branches beyond +-1MiB.
break;
+ case kCodeAddr4KiB:
+ // We don't support Code address ADR beyond +4KiB.
+ break;
+
case kLiteral1KiB:
DCHECK(!IsHighRegister(rn_));
if (IsUint<10>(GetOffset(current_code_size))) {
@@ -2159,13 +2179,15 @@ uint32_t Thumb2Assembler::Fixup::AdjustSizeIfNeeded(uint32_t current_code_size)
return current_code_size - old_code_size;
}
-void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) const {
+void Thumb2Assembler::Fixup::Emit(uint32_t emit_location,
+ AssemblerBuffer* buffer,
+ uint32_t code_size) const {
switch (GetSize()) {
case kBranch16Bit: {
DCHECK(type_ == kUnconditional || type_ == kConditional);
DCHECK_EQ(type_ == kConditional, cond_ != AL);
int16_t encoding = BEncoding16(GetOffset(code_size), cond_);
- buffer->Store<int16_t>(location_, encoding);
+ buffer->Store<int16_t>(emit_location, encoding);
break;
}
case kBranch32Bit: {
@@ -2180,15 +2202,15 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c
DCHECK_NE(encoding & B12, 0);
encoding ^= B14 | B12;
}
- buffer->Store<int16_t>(location_, encoding >> 16);
- buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
+ buffer->Store<int16_t>(emit_location, encoding >> 16);
+ buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff));
break;
}
case kCbxz16Bit: {
DCHECK(type_ == kCompareAndBranchXZero);
int16_t encoding = CbxzEncoding16(rn_, GetOffset(code_size), cond_);
- buffer->Store<int16_t>(location_, encoding);
+ buffer->Store<int16_t>(emit_location, encoding);
break;
}
case kCbxz32Bit: {
@@ -2196,8 +2218,8 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c
DCHECK(cond_ == EQ || cond_ == NE);
int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0);
int16_t b_encoding = BEncoding16(GetOffset(code_size), cond_);
- buffer->Store<int16_t>(location_, cmp_encoding);
- buffer->Store<int16_t>(location_ + 2, b_encoding);
+ buffer->Store<int16_t>(emit_location, cmp_encoding);
+ buffer->Store<int16_t>(emit_location + 2, b_encoding);
break;
}
case kCbxz48Bit: {
@@ -2205,24 +2227,32 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c
DCHECK(cond_ == EQ || cond_ == NE);
int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0);
int32_t b_encoding = BEncoding32(GetOffset(code_size), cond_);
- buffer->Store<int16_t>(location_, cmp_encoding);
- buffer->Store<int16_t>(location_ + 2u, b_encoding >> 16);
- buffer->Store<int16_t>(location_ + 4u, static_cast<int16_t>(b_encoding & 0xffff));
+ buffer->Store<int16_t>(emit_location, cmp_encoding);
+ buffer->Store<int16_t>(emit_location + 2u, b_encoding >> 16);
+ buffer->Store<int16_t>(emit_location + 4u, static_cast<int16_t>(b_encoding & 0xffff));
+ break;
+ }
+
+ case kCodeAddr4KiB: {
+ DCHECK(type_ == kLoadCodeAddr);
+ int32_t encoding = AdrEncoding32(rn_, GetOffset(code_size));
+ buffer->Store<int16_t>(emit_location, encoding >> 16);
+ buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff));
break;
}
case kLiteral1KiB: {
DCHECK(type_ == kLoadLiteralNarrow);
int16_t encoding = LdrLitEncoding16(rn_, GetOffset(code_size));
- buffer->Store<int16_t>(location_, encoding);
+ buffer->Store<int16_t>(emit_location, encoding);
break;
}
case kLiteral4KiB: {
DCHECK(type_ == kLoadLiteralNarrow);
// GetOffset() uses PC+4 but load literal uses AlignDown(PC+4, 4). Adjust offset accordingly.
int32_t encoding = LdrLitEncoding32(rn_, GetOffset(code_size));
- buffer->Store<int16_t>(location_, encoding >> 16);
- buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
+ buffer->Store<int16_t>(emit_location, encoding >> 16);
+ buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff));
break;
}
case kLiteral64KiB: {
@@ -2242,11 +2272,11 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c
int32_t mov_encoding = MovModImmEncoding32(rn_, offset & ~0xfff);
int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, offset & 0xfff);
- buffer->Store<int16_t>(location_, mov_encoding >> 16);
- buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
- buffer->Store<int16_t>(location_ + 4u, add_pc_encoding);
- buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16);
- buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff));
+ buffer->Store<int16_t>(emit_location, mov_encoding >> 16);
+ buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
+ buffer->Store<int16_t>(emit_location + 4u, add_pc_encoding);
+ buffer->Store<int16_t>(emit_location + 6u, ldr_encoding >> 16);
+ buffer->Store<int16_t>(emit_location + 8u, static_cast<int16_t>(ldr_encoding & 0xffff));
break;
}
case kLiteralFar: {
@@ -2256,36 +2286,36 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c
int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff);
int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, 0);
- buffer->Store<int16_t>(location_, movw_encoding >> 16);
- buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
- buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16);
- buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
- buffer->Store<int16_t>(location_ + 8u, add_pc_encoding);
- buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16);
- buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff));
+ buffer->Store<int16_t>(emit_location, movw_encoding >> 16);
+ buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
+ buffer->Store<int16_t>(emit_location + 4u, movt_encoding >> 16);
+ buffer->Store<int16_t>(emit_location + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
+ buffer->Store<int16_t>(emit_location + 8u, add_pc_encoding);
+ buffer->Store<int16_t>(emit_location + 10u, ldr_encoding >> 16);
+ buffer->Store<int16_t>(emit_location + 12u, static_cast<int16_t>(ldr_encoding & 0xffff));
break;
}
case kLiteralAddr1KiB: {
DCHECK(type_ == kLoadLiteralAddr);
int16_t encoding = AdrEncoding16(rn_, GetOffset(code_size));
- buffer->Store<int16_t>(location_, encoding);
+ buffer->Store<int16_t>(emit_location, encoding);
break;
}
case kLiteralAddr4KiB: {
DCHECK(type_ == kLoadLiteralAddr);
int32_t encoding = AdrEncoding32(rn_, GetOffset(code_size));
- buffer->Store<int16_t>(location_, encoding >> 16);
- buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
+ buffer->Store<int16_t>(emit_location, encoding >> 16);
+ buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff));
break;
}
case kLiteralAddr64KiB: {
DCHECK(type_ == kLoadLiteralAddr);
int32_t mov_encoding = MovwEncoding32(rn_, GetOffset(code_size));
int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
- buffer->Store<int16_t>(location_, mov_encoding >> 16);
- buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
- buffer->Store<int16_t>(location_ + 4u, add_pc_encoding);
+ buffer->Store<int16_t>(emit_location, mov_encoding >> 16);
+ buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
+ buffer->Store<int16_t>(emit_location + 4u, add_pc_encoding);
break;
}
case kLiteralAddrFar: {
@@ -2294,29 +2324,29 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c
int32_t movw_encoding = MovwEncoding32(rn_, offset & 0xffff);
int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff);
int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
- buffer->Store<int16_t>(location_, movw_encoding >> 16);
- buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
- buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16);
- buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
- buffer->Store<int16_t>(location_ + 8u, add_pc_encoding);
+ buffer->Store<int16_t>(emit_location, movw_encoding >> 16);
+ buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
+ buffer->Store<int16_t>(emit_location + 4u, movt_encoding >> 16);
+ buffer->Store<int16_t>(emit_location + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
+ buffer->Store<int16_t>(emit_location + 8u, add_pc_encoding);
break;
}
case kLongOrFPLiteral1KiB: {
int32_t encoding = LoadWideOrFpEncoding(PC, GetOffset(code_size)); // DCHECKs type_.
- buffer->Store<int16_t>(location_, encoding >> 16);
- buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
+ buffer->Store<int16_t>(emit_location, encoding >> 16);
+ buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff));
break;
}
case kLongOrFPLiteral64KiB: {
int32_t mov_encoding = MovwEncoding32(IP, GetOffset(code_size));
int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC);
int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0u); // DCHECKs type_.
- buffer->Store<int16_t>(location_, mov_encoding >> 16);
- buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
- buffer->Store<int16_t>(location_ + 4u, add_pc_encoding);
- buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16);
- buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff));
+ buffer->Store<int16_t>(emit_location, mov_encoding >> 16);
+ buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
+ buffer->Store<int16_t>(emit_location + 4u, add_pc_encoding);
+ buffer->Store<int16_t>(emit_location + 6u, ldr_encoding >> 16);
+ buffer->Store<int16_t>(emit_location + 8u, static_cast<int16_t>(ldr_encoding & 0xffff));
break;
}
case kLongOrFPLiteralFar: {
@@ -2325,13 +2355,13 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c
int32_t movt_encoding = MovtEncoding32(IP, offset & ~0xffff);
int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC);
int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0); // DCHECKs type_.
- buffer->Store<int16_t>(location_, movw_encoding >> 16);
- buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
- buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16);
- buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
- buffer->Store<int16_t>(location_ + 8u, add_pc_encoding);
- buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16);
- buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff));
+ buffer->Store<int16_t>(emit_location, movw_encoding >> 16);
+ buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
+ buffer->Store<int16_t>(emit_location + 4u, movt_encoding >> 16);
+ buffer->Store<int16_t>(emit_location + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
+ buffer->Store<int16_t>(emit_location + 8u, add_pc_encoding);
+ buffer->Store<int16_t>(emit_location + 10u, ldr_encoding >> 16);
+ buffer->Store<int16_t>(emit_location + 12u, static_cast<int16_t>(ldr_encoding & 0xffff));
break;
}
}
@@ -3331,6 +3361,19 @@ void Thumb2Assembler::bx(Register rm, Condition cond) {
}
+void Thumb2Assembler::AdrCode(Register rt, Label* label) {
+ uint32_t pc = buffer_.Size();
+ FixupId branch_id = AddFixup(Fixup::LoadCodeAddress(pc, rt));
+ CHECK(!label->IsBound());
+ // ADR target must be an unbound label. Add it to a singly-linked list maintained within
+ // the code with the label serving as the head.
+ Emit16(static_cast<uint16_t>(label->position_));
+ label->LinkTo(branch_id);
+ Emit16(0);
+ DCHECK_EQ(buffer_.Size() - pc, GetFixup(branch_id)->GetSizeInBytes());
+}
+
+
void Thumb2Assembler::Push(Register rd, Condition cond) {
str(rd, Address(SP, -kRegisterSize, Address::PreIndex), cond);
}
@@ -3405,7 +3448,7 @@ void Thumb2Assembler::Bind(Label* label) {
break;
}
}
- last_fixup.Emit(&buffer_, buffer_.Size());
+ last_fixup.Emit(last_fixup.GetLocation(), &buffer_, buffer_.Size());
fixups_.pop_back();
}
}
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 1c495aa7a7..2ff9018510 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -268,6 +268,9 @@ class Thumb2Assembler FINAL : public ArmAssembler {
void blx(Register rm, Condition cond = AL) OVERRIDE;
void bx(Register rm, Condition cond = AL) OVERRIDE;
+ // ADR instruction loading register for branching to the label, including the Thumb mode bit.
+ void AdrCode(Register rt, Label* label) OVERRIDE;
+
virtual void Lsl(Register rd, Register rm, uint32_t shift_imm,
Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
virtual void Lsr(Register rd, Register rm, uint32_t shift_imm,
@@ -377,6 +380,10 @@ class Thumb2Assembler FINAL : public ArmAssembler {
force_32bit_ = true;
}
+ void Allow16Bit() {
+ force_32bit_ = false;
+ }
+
// Emit an ADR (or a sequence of instructions) to load the jump table address into base_reg. This
// will generate a fixup.
JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE;
@@ -422,6 +429,7 @@ class Thumb2Assembler FINAL : public ArmAssembler {
kUnconditionalLink, // BL.
kUnconditionalLinkX, // BLX.
kCompareAndBranchXZero, // cbz/cbnz.
+ kLoadCodeAddr, // Get address of a code label, used for Baker read barriers.
kLoadLiteralNarrow, // Load narrrow integer literal.
kLoadLiteralWide, // Load wide integer literal.
kLoadLiteralAddr, // Load address of literal (used for jump table).
@@ -442,6 +450,10 @@ class Thumb2Assembler FINAL : public ArmAssembler {
kCbxz32Bit, // CMP rX, #0 + Bcc label; X < 8; 16-bit Bcc; +-8-bit offset.
kCbxz48Bit, // CMP rX, #0 + Bcc label; X < 8; 32-bit Bcc; up to +-1MiB offset.
+ // ADR variants.
+ kCodeAddr4KiB, // ADR rX, <label>; label must be after the ADR but within 4KiB range.
+ // Multi-instruction expansion is not supported.
+
// Load integer literal variants.
// LDR rX, label; X < 8; 16-bit variant up to 1KiB offset; 2 bytes.
kLiteral1KiB,
@@ -492,6 +504,12 @@ class Thumb2Assembler FINAL : public ArmAssembler {
cond, kCompareAndBranchXZero, kCbxz16Bit, location);
}
+ // Code address.
+ static Fixup LoadCodeAddress(uint32_t location, Register rt) {
+ return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister,
+ AL, kLoadCodeAddr, kCodeAddr4KiB, location);
+ }
+
// Load narrow literal.
static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size) {
DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB ||
@@ -550,6 +568,7 @@ class Thumb2Assembler FINAL : public ArmAssembler {
switch (GetOriginalSize()) {
case kBranch32Bit:
case kCbxz48Bit:
+ case kCodeAddr4KiB:
case kLiteralFar:
case kLiteralAddrFar:
case kLongOrFPLiteralFar:
@@ -623,7 +642,7 @@ class Thumb2Assembler FINAL : public ArmAssembler {
// Emit the branch instruction into the assembler buffer. This does the
// encoding into the thumb instruction.
- void Emit(AssemblerBuffer* buffer, uint32_t code_size) const;
+ void Emit(uint32_t emit_location, AssemblerBuffer* buffer, uint32_t code_size) const;
private:
Fixup(Register rn, Register rt2, SRegister sd, DRegister dd,
@@ -903,6 +922,26 @@ class Thumb2Assembler FINAL : public ArmAssembler {
FixupId last_fixup_id_;
};
+class ScopedForce32Bit {
+ public:
+ explicit ScopedForce32Bit(Thumb2Assembler* assembler, bool force = true)
+ : assembler_(assembler), old_force_32bit_(assembler->IsForced32Bit()) {
+ if (force) {
+ assembler->Force32Bit();
+ }
+ }
+
+ ~ScopedForce32Bit() {
+ if (!old_force_32bit_) {
+ assembler_->Allow16Bit();
+ }
+ }
+
+ private:
+ Thumb2Assembler* const assembler_;
+ const bool old_force_32bit_;
+};
+
} // namespace arm
} // namespace art
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index f8c4008b45..eaaf81518a 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -5535,7 +5535,7 @@ const char* const VixlJniHelpersResults[] = {
" f0: f1bc 0f00 cmp.w ip, #0\n",
" f4: bf18 it ne\n",
" f6: f20d 4c01 addwne ip, sp, #1025 ; 0x401\n",
- " fa: f8d9 c084 ldr.w ip, [r9, #132] ; 0x84\n",
+ " fa: f8d9 c08c ldr.w ip, [r9, #140] ; 0x8c\n",
" fe: f1bc 0f00 cmp.w ip, #0\n",
" 102: d171 bne.n 1e8 <VixlJniHelpers+0x1e8>\n",
" 104: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n",
@@ -5610,7 +5610,7 @@ const char* const VixlJniHelpersResults[] = {
" 214: ecbd 8a10 vpop {s16-s31}\n",
" 218: e8bd 8de0 ldmia.w sp!, {r5, r6, r7, r8, sl, fp, pc}\n",
" 21c: 4660 mov r0, ip\n",
- " 21e: f8d9 c2b8 ldr.w ip, [r9, #696] ; 0x2b8\n",
+ " 21e: f8d9 c2c0 ldr.w ip, [r9, #704] ; 0x2c0\n",
" 222: 47e0 blx ip\n",
nullptr
};
diff --git a/compiler/utils/dedupe_set_test.cc b/compiler/utils/dedupe_set_test.cc
index 4c0979e0b7..b390508ed4 100644
--- a/compiler/utils/dedupe_set_test.cc
+++ b/compiler/utils/dedupe_set_test.cc
@@ -23,7 +23,7 @@
#include "base/array_ref.h"
#include "dedupe_set-inl.h"
#include "gtest/gtest.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
namespace art {
diff --git a/compiler/utils/label.h b/compiler/utils/label.h
index 0f82ad5ff1..4c6ae8e218 100644
--- a/compiler/utils/label.h
+++ b/compiler/utils/label.h
@@ -29,24 +29,24 @@ class AssemblerFixup;
namespace arm {
class ArmAssembler;
class Thumb2Assembler;
-}
+} // namespace arm
namespace arm64 {
class Arm64Assembler;
-}
+} // namespace arm64
namespace mips {
class MipsAssembler;
-}
+} // namespace mips
namespace mips64 {
class Mips64Assembler;
-}
+} // namespace mips64
namespace x86 {
class X86Assembler;
class NearLabel;
-}
+} // namespace x86
namespace x86_64 {
class X86_64Assembler;
class NearLabel;
-}
+} // namespace x86_64
class ExternalLabel {
public:
diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h
index 184cdf5050..2b7b2aa7ce 100644
--- a/compiler/utils/managed_register.h
+++ b/compiler/utils/managed_register.h
@@ -26,24 +26,24 @@ namespace art {
namespace arm {
class ArmManagedRegister;
-}
+} // namespace arm
namespace arm64 {
class Arm64ManagedRegister;
-}
+} // namespace arm64
namespace mips {
class MipsManagedRegister;
-}
+} // namespace mips
namespace mips64 {
class Mips64ManagedRegister;
-}
+} // namespace mips64
namespace x86 {
class X86ManagedRegister;
-}
+} // namespace x86
namespace x86_64 {
class X86_64ManagedRegister;
-}
+} // namespace x86_64
class ManagedRegister : public ValueObject {
public:
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 57223b52a3..b8b800abe3 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -1356,6 +1356,106 @@ void Mips64Assembler::Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegiste
EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x12);
}
+void Mips64Assembler::Add_aB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Add_aH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Add_aW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Add_aD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x10);
+}
+
void Mips64Assembler::FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
CHECK(HasMsa());
EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1b);
@@ -1675,6 +1775,37 @@ void Mips64Assembler::StD(VectorRegister wd, GpuRegister rs, int offset) {
EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x9, 0x3);
}
+void Mips64Assembler::IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvrH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
+ EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::ReplicateFPToVectorRegister(VectorRegister dst,
+ FpuRegister src,
+ bool is_double) {
+ // Float or double in FPU register Fx can be considered as 0th element in vector register Wx.
+ if (is_double) {
+ SplatiD(dst, static_cast<VectorRegister>(src), 0);
+ } else {
+ SplatiW(dst, static_cast<VectorRegister>(src), 0);
+ }
+}
+
void Mips64Assembler::LoadConst32(GpuRegister rd, int32_t value) {
TemplateLoadConst32(this, rd, value);
}
@@ -2702,6 +2833,94 @@ void Mips64Assembler::AdjustBaseAndOffset(GpuRegister& base,
CHECK_EQ(misalignment, offset & (kMips64DoublewordSize - 1));
}
+void Mips64Assembler::AdjustBaseOffsetAndElementSizeShift(GpuRegister& base,
+ int32_t& offset,
+ int& element_size_shift) {
+ // This method is used to adjust the base register, offset and element_size_shift
+ // for a vector load/store when the offset doesn't fit into allowed number of bits.
+ // MSA ld.df and st.df instructions take signed offsets as arguments, but maximum
+ // offset is dependant on the size of the data format df (10-bit offsets for ld.b,
+ // 11-bit for ld.h, 12-bit for ld.w and 13-bit for ld.d).
+ // If element_size_shift is non-negative at entry, it won't be changed, but offset
+ // will be checked for appropriate alignment. If negative at entry, it will be
+ // adjusted based on offset for maximum fit.
+ // It's assumed that `base` is a multiple of 8.
+
+ CHECK_NE(base, AT); // Must not overwrite the register `base` while loading `offset`.
+
+ if (element_size_shift >= 0) {
+ CHECK_LE(element_size_shift, TIMES_8);
+ CHECK_GE(JAVASTYLE_CTZ(offset), element_size_shift);
+ } else if (IsAligned<kMips64DoublewordSize>(offset)) {
+ element_size_shift = TIMES_8;
+ } else if (IsAligned<kMips64WordSize>(offset)) {
+ element_size_shift = TIMES_4;
+ } else if (IsAligned<kMips64HalfwordSize>(offset)) {
+ element_size_shift = TIMES_2;
+ } else {
+ element_size_shift = TIMES_1;
+ }
+
+ const int low_len = 10 + element_size_shift; // How many low bits of `offset` ld.df/st.df
+ // will take.
+ int16_t low = offset & ((1 << low_len) - 1); // Isolate these bits.
+ low -= (low & (1 << (low_len - 1))) << 1; // Sign-extend these bits.
+ if (low == offset) {
+ return; // `offset` fits into ld.df/st.df.
+ }
+
+ // First, see if `offset` can be represented as a sum of two signed offsets.
+ // This can save an instruction.
+
+ // Max int16_t that's a multiple of element size.
+ const int32_t kMaxDeltaForSimpleAdjustment = 0x8000 - (1 << element_size_shift);
+ // Max ld.df/st.df offset that's a multiple of element size.
+ const int32_t kMaxLoadStoreOffset = 0x1ff << element_size_shift;
+ const int32_t kMaxOffsetForSimpleAdjustment = kMaxDeltaForSimpleAdjustment + kMaxLoadStoreOffset;
+
+ if (IsInt<16>(offset)) {
+ Daddiu(AT, base, offset);
+ offset = 0;
+ } else if (0 <= offset && offset <= kMaxOffsetForSimpleAdjustment) {
+ Daddiu(AT, base, kMaxDeltaForSimpleAdjustment);
+ offset -= kMaxDeltaForSimpleAdjustment;
+ } else if (-kMaxOffsetForSimpleAdjustment <= offset && offset < 0) {
+ Daddiu(AT, base, -kMaxDeltaForSimpleAdjustment);
+ offset += kMaxDeltaForSimpleAdjustment;
+ } else {
+ // Let's treat `offset` as 64-bit to simplify handling of sign
+ // extensions in the instructions that supply its smaller signed parts.
+ //
+ // 16-bit or smaller parts of `offset`:
+ // |63 top 48|47 hi 32|31 upper 16|15 mid 13-10|12-9 low 0|
+ //
+ // Instructions that supply each part as a signed integer addend:
+ // |dati |dahi |daui |daddiu |ld.df/st.df |
+ //
+ // `top` is always 0, so dati isn't used.
+ // `hi` is 1 when `offset` is close to +2GB and 0 otherwise.
+ uint64_t tmp = static_cast<uint64_t>(offset) - low; // Exclude `low` from the rest of `offset`
+ // (accounts for sign of `low`).
+ tmp += (tmp & (UINT64_C(1) << 15)) << 1; // Account for sign extension in daddiu.
+ tmp += (tmp & (UINT64_C(1) << 31)) << 1; // Account for sign extension in daui.
+ int16_t mid = Low16Bits(tmp);
+ int16_t upper = High16Bits(tmp);
+ int16_t hi = Low16Bits(High32Bits(tmp));
+ Daui(AT, base, upper);
+ if (hi != 0) {
+ CHECK_EQ(hi, 1);
+ Dahi(AT, hi);
+ }
+ if (mid != 0) {
+ Daddiu(AT, AT, mid);
+ }
+ offset = low;
+ }
+ base = AT;
+ CHECK_GE(JAVASTYLE_CTZ(offset), element_size_shift);
+ CHECK(IsInt<10>(offset >> element_size_shift));
+}
+
void Mips64Assembler::LoadFromOffset(LoadOperandType type,
GpuRegister reg,
GpuRegister base,
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 666c6935a1..9b4064543f 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -278,14 +278,16 @@ enum LoadOperandType {
kLoadUnsignedHalfword,
kLoadWord,
kLoadUnsignedWord,
- kLoadDoubleword
+ kLoadDoubleword,
+ kLoadQuadword
};
enum StoreOperandType {
kStoreByte,
kStoreHalfword,
kStoreWord,
- kStoreDoubleword
+ kStoreDoubleword,
+ kStoreQuadword
};
// Used to test the values returned by ClassS/ClassD.
@@ -682,6 +684,26 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
void Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
void Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
void Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void Add_aB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void Add_aH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void Add_aW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void Add_aD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void Ave_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void Ave_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void Ave_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void Ave_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void Ave_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void Ave_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void Ave_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void Ave_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void Aver_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void Aver_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void Aver_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void Aver_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void Aver_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
void FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
void FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
@@ -747,6 +769,14 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
void StW(VectorRegister wd, GpuRegister rs, int offset);
void StD(VectorRegister wd, GpuRegister rs, int offset);
+ void IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void IlvrH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
+ // Helper for replicating floating point value in all destination elements.
+ void ReplicateFPToVectorRegister(VectorRegister dst, FpuRegister src, bool is_double);
+
// Higher level composite instructions.
int InstrCountForLoadReplicatedConst32(int64_t);
void LoadConst32(GpuRegister rd, int32_t value);
@@ -876,6 +906,10 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
void AdjustBaseAndOffset(GpuRegister& base, int32_t& offset, bool is_doubleword);
+ // If element_size_shift is negative at entry, its value will be calculated based on the offset.
+ void AdjustBaseOffsetAndElementSizeShift(GpuRegister& base,
+ int32_t& offset,
+ int& element_size_shift);
private:
// This will be used as an argument for loads/stores
@@ -999,6 +1033,8 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
null_checker();
}
break;
+ default:
+ LOG(FATAL) << "UNREACHABLE";
}
if (type != kLoadDoubleword) {
null_checker();
@@ -1011,7 +1047,12 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
GpuRegister base,
int32_t offset,
ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
- AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword));
+ int element_size_shift = -1;
+ if (type != kLoadQuadword) {
+ AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword));
+ } else {
+ AdjustBaseOffsetAndElementSizeShift(base, offset, element_size_shift);
+ }
switch (type) {
case kLoadWord:
@@ -1031,6 +1072,17 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
null_checker();
}
break;
+ case kLoadQuadword:
+ switch (element_size_shift) {
+ case TIMES_1: LdB(static_cast<VectorRegister>(reg), base, offset); break;
+ case TIMES_2: LdH(static_cast<VectorRegister>(reg), base, offset); break;
+ case TIMES_4: LdW(static_cast<VectorRegister>(reg), base, offset); break;
+ case TIMES_8: LdD(static_cast<VectorRegister>(reg), base, offset); break;
+ default:
+ LOG(FATAL) << "UNREACHABLE";
+ }
+ null_checker();
+ break;
default:
LOG(FATAL) << "UNREACHABLE";
}
@@ -1084,7 +1136,12 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
GpuRegister base,
int32_t offset,
ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
- AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword));
+ int element_size_shift = -1;
+ if (type != kStoreQuadword) {
+ AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword));
+ } else {
+ AdjustBaseOffsetAndElementSizeShift(base, offset, element_size_shift);
+ }
switch (type) {
case kStoreWord:
@@ -1104,6 +1161,17 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
null_checker();
}
break;
+ case kStoreQuadword:
+ switch (element_size_shift) {
+ case TIMES_1: StB(static_cast<VectorRegister>(reg), base, offset); break;
+ case TIMES_2: StH(static_cast<VectorRegister>(reg), base, offset); break;
+ case TIMES_4: StW(static_cast<VectorRegister>(reg), base, offset); break;
+ case TIMES_8: StD(static_cast<VectorRegister>(reg), base, offset); break;
+ default:
+ LOG(FATAL) << "UNREACHABLE";
+ }
+ null_checker();
+ break;
default:
LOG(FATAL) << "UNREACHABLE";
}
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index f2e3b1610c..fbebe0ce15 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -1970,6 +1970,50 @@ TEST_F(AssemblerMIPS64Test, LoadFpuFromOffset) {
__ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, -32768);
__ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0xABCDEF00);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 8);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 511);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 512);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 513);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 514);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 516);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1022);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1024);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1025);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1026);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1028);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2044);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2048);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2049);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2050);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2052);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4088);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4096);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4097);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4098);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4100);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4104);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x7FFC);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x8000);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x10000);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x12345678);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x12350078);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -256);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -511);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -513);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -1022);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -1026);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -2044);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -2052);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -4096);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -4104);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -32768);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0xABCDEF00);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x7FFFABCD);
+
const char* expected =
"lwc1 $f0, 0($a0)\n"
"lwc1 $f0, 4($a0)\n"
@@ -2010,7 +2054,78 @@ TEST_F(AssemblerMIPS64Test, LoadFpuFromOffset) {
"ldc1 $f0, -256($a0)\n"
"ldc1 $f0, -32768($a0)\n"
"daui $at, $a0, 0xABCE\n"
- "ldc1 $f0, -0x1100($at) # 0xEF00\n";
+ "ldc1 $f0, -0x1100($at) # 0xEF00\n"
+
+ "ld.d $w0, 0($a0)\n"
+ "ld.b $w0, 1($a0)\n"
+ "ld.h $w0, 2($a0)\n"
+ "ld.w $w0, 4($a0)\n"
+ "ld.d $w0, 8($a0)\n"
+ "ld.b $w0, 511($a0)\n"
+ "ld.d $w0, 512($a0)\n"
+ "daddiu $at, $a0, 513\n"
+ "ld.b $w0, 0($at)\n"
+ "ld.h $w0, 514($a0)\n"
+ "ld.w $w0, 516($a0)\n"
+ "ld.h $w0, 1022($a0)\n"
+ "ld.d $w0, 1024($a0)\n"
+ "daddiu $at, $a0, 1025\n"
+ "ld.b $w0, 0($at)\n"
+ "daddiu $at, $a0, 1026\n"
+ "ld.h $w0, 0($at)\n"
+ "ld.w $w0, 1028($a0)\n"
+ "ld.w $w0, 2044($a0)\n"
+ "ld.d $w0, 2048($a0)\n"
+ "daddiu $at, $a0, 2049\n"
+ "ld.b $w0, 0($at)\n"
+ "daddiu $at, $a0, 2050\n"
+ "ld.h $w0, 0($at)\n"
+ "daddiu $at, $a0, 2052\n"
+ "ld.w $w0, 0($at)\n"
+ "ld.d $w0, 4088($a0)\n"
+ "daddiu $at, $a0, 4096\n"
+ "ld.d $w0, 0($at)\n"
+ "daddiu $at, $a0, 4097\n"
+ "ld.b $w0, 0($at)\n"
+ "daddiu $at, $a0, 4098\n"
+ "ld.h $w0, 0($at)\n"
+ "daddiu $at, $a0, 4100\n"
+ "ld.w $w0, 0($at)\n"
+ "daddiu $at, $a0, 4104\n"
+ "ld.d $w0, 0($at)\n"
+ "daddiu $at, $a0, 0x7FFC\n"
+ "ld.w $w0, 0($at)\n"
+ "daddiu $at, $a0, 0x7FF8\n"
+ "ld.d $w0, 8($at)\n"
+ "daui $at, $a0, 0x1\n"
+ "ld.d $w0, 0($at)\n"
+ "daui $at, $a0, 0x1234\n"
+ "daddiu $at, $at, 0x6000\n"
+ "ld.d $w0, -2440($at) # 0xF678\n"
+ "daui $at, $a0, 0x1235\n"
+ "ld.d $w0, 0x78($at)\n"
+ "ld.d $w0, -256($a0)\n"
+ "ld.b $w0, -511($a0)\n"
+ "daddiu $at, $a0, -513\n"
+ "ld.b $w0, 0($at)\n"
+ "ld.h $w0, -1022($a0)\n"
+ "daddiu $at, $a0, -1026\n"
+ "ld.h $w0, 0($at)\n"
+ "ld.w $w0, -2044($a0)\n"
+ "daddiu $at, $a0, -2052\n"
+ "ld.w $w0, 0($at)\n"
+ "ld.d $w0, -4096($a0)\n"
+ "daddiu $at, $a0, -4104\n"
+ "ld.d $w0, 0($at)\n"
+ "daddiu $at, $a0, -32768\n"
+ "ld.d $w0, 0($at)\n"
+ "daui $at, $a0, 0xABCE\n"
+ "daddiu $at, $at, -8192 # 0xE000\n"
+ "ld.d $w0, 0xF00($at)\n"
+ "daui $at, $a0, 0x8000\n"
+ "dahi $at, $at, 1\n"
+ "daddiu $at, $at, -21504 # 0xAC00\n"
+ "ld.b $w0, -51($at) # 0xFFCD\n";
DriverStr(expected, "LoadFpuFromOffset");
}
@@ -2200,6 +2315,50 @@ TEST_F(AssemblerMIPS64Test, StoreFpuToOffset) {
__ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, -32768);
__ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0xABCDEF00);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 8);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 511);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 512);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 513);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 514);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 516);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1022);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1024);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1025);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1026);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1028);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2044);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2048);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2049);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2050);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2052);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4088);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4096);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4097);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4098);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4100);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4104);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x7FFC);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x8000);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x10000);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x12345678);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x12350078);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -256);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -511);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -513);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -1022);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -1026);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -2044);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -2052);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -4096);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -4104);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -32768);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0xABCDEF00);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x7FFFABCD);
+
const char* expected =
"swc1 $f0, 0($a0)\n"
"swc1 $f0, 4($a0)\n"
@@ -2240,7 +2399,78 @@ TEST_F(AssemblerMIPS64Test, StoreFpuToOffset) {
"sdc1 $f0, -256($a0)\n"
"sdc1 $f0, -32768($a0)\n"
"daui $at, $a0, 0xABCE\n"
- "sdc1 $f0, -0x1100($at)\n";
+ "sdc1 $f0, -0x1100($at)\n"
+
+ "st.d $w0, 0($a0)\n"
+ "st.b $w0, 1($a0)\n"
+ "st.h $w0, 2($a0)\n"
+ "st.w $w0, 4($a0)\n"
+ "st.d $w0, 8($a0)\n"
+ "st.b $w0, 511($a0)\n"
+ "st.d $w0, 512($a0)\n"
+ "daddiu $at, $a0, 513\n"
+ "st.b $w0, 0($at)\n"
+ "st.h $w0, 514($a0)\n"
+ "st.w $w0, 516($a0)\n"
+ "st.h $w0, 1022($a0)\n"
+ "st.d $w0, 1024($a0)\n"
+ "daddiu $at, $a0, 1025\n"
+ "st.b $w0, 0($at)\n"
+ "daddiu $at, $a0, 1026\n"
+ "st.h $w0, 0($at)\n"
+ "st.w $w0, 1028($a0)\n"
+ "st.w $w0, 2044($a0)\n"
+ "st.d $w0, 2048($a0)\n"
+ "daddiu $at, $a0, 2049\n"
+ "st.b $w0, 0($at)\n"
+ "daddiu $at, $a0, 2050\n"
+ "st.h $w0, 0($at)\n"
+ "daddiu $at, $a0, 2052\n"
+ "st.w $w0, 0($at)\n"
+ "st.d $w0, 4088($a0)\n"
+ "daddiu $at, $a0, 4096\n"
+ "st.d $w0, 0($at)\n"
+ "daddiu $at, $a0, 4097\n"
+ "st.b $w0, 0($at)\n"
+ "daddiu $at, $a0, 4098\n"
+ "st.h $w0, 0($at)\n"
+ "daddiu $at, $a0, 4100\n"
+ "st.w $w0, 0($at)\n"
+ "daddiu $at, $a0, 4104\n"
+ "st.d $w0, 0($at)\n"
+ "daddiu $at, $a0, 0x7FFC\n"
+ "st.w $w0, 0($at)\n"
+ "daddiu $at, $a0, 0x7FF8\n"
+ "st.d $w0, 8($at)\n"
+ "daui $at, $a0, 0x1\n"
+ "st.d $w0, 0($at)\n"
+ "daui $at, $a0, 0x1234\n"
+ "daddiu $at, $at, 0x6000\n"
+ "st.d $w0, -2440($at) # 0xF678\n"
+ "daui $at, $a0, 0x1235\n"
+ "st.d $w0, 0x78($at)\n"
+ "st.d $w0, -256($a0)\n"
+ "st.b $w0, -511($a0)\n"
+ "daddiu $at, $a0, -513\n"
+ "st.b $w0, 0($at)\n"
+ "st.h $w0, -1022($a0)\n"
+ "daddiu $at, $a0, -1026\n"
+ "st.h $w0, 0($at)\n"
+ "st.w $w0, -2044($a0)\n"
+ "daddiu $at, $a0, -2052\n"
+ "st.w $w0, 0($at)\n"
+ "st.d $w0, -4096($a0)\n"
+ "daddiu $at, $a0, -4104\n"
+ "st.d $w0, 0($at)\n"
+ "daddiu $at, $a0, -32768\n"
+ "st.d $w0, 0($at)\n"
+ "daui $at, $a0, 0xABCE\n"
+ "daddiu $at, $at, -8192 # 0xE000\n"
+ "st.d $w0, 0xF00($at)\n"
+ "daui $at, $a0, 0x8000\n"
+ "dahi $at, $at, 1\n"
+ "daddiu $at, $at, -21504 # 0xAC00\n"
+ "st.b $w0, -51($at) # 0xFFCD\n";
DriverStr(expected, "StoreFpuToOffset");
}
@@ -2668,6 +2898,106 @@ TEST_F(AssemblerMIPS64Test, Mod_uD) {
"mod_u.d");
}
+TEST_F(AssemblerMIPS64Test, Add_aB) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::Add_aB, "add_a.b ${reg1}, ${reg2}, ${reg3}"),
+ "add_a.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Add_aH) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::Add_aH, "add_a.h ${reg1}, ${reg2}, ${reg3}"),
+ "add_a.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Add_aW) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::Add_aW, "add_a.w ${reg1}, ${reg2}, ${reg3}"),
+ "add_a.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Add_aD) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::Add_aD, "add_a.d ${reg1}, ${reg2}, ${reg3}"),
+ "add_a.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_sB) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_sB, "ave_s.b ${reg1}, ${reg2}, ${reg3}"),
+ "ave_s.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_sH) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_sH, "ave_s.h ${reg1}, ${reg2}, ${reg3}"),
+ "ave_s.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_sW) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_sW, "ave_s.w ${reg1}, ${reg2}, ${reg3}"),
+ "ave_s.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_sD) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_sD, "ave_s.d ${reg1}, ${reg2}, ${reg3}"),
+ "ave_s.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_uB) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_uB, "ave_u.b ${reg1}, ${reg2}, ${reg3}"),
+ "ave_u.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_uH) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_uH, "ave_u.h ${reg1}, ${reg2}, ${reg3}"),
+ "ave_u.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_uW) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_uW, "ave_u.w ${reg1}, ${reg2}, ${reg3}"),
+ "ave_u.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_uD) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_uD, "ave_u.d ${reg1}, ${reg2}, ${reg3}"),
+ "ave_u.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_sB) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_sB, "aver_s.b ${reg1}, ${reg2}, ${reg3}"),
+ "aver_s.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_sH) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_sH, "aver_s.h ${reg1}, ${reg2}, ${reg3}"),
+ "aver_s.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_sW) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_sW, "aver_s.w ${reg1}, ${reg2}, ${reg3}"),
+ "aver_s.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_sD) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_sD, "aver_s.d ${reg1}, ${reg2}, ${reg3}"),
+ "aver_s.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_uB) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_uB, "aver_u.b ${reg1}, ${reg2}, ${reg3}"),
+ "aver_u.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_uH) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_uH, "aver_u.h ${reg1}, ${reg2}, ${reg3}"),
+ "aver_u.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_uW) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_uW, "aver_u.w ${reg1}, ${reg2}, ${reg3}"),
+ "aver_u.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_uD) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_uD, "aver_u.d ${reg1}, ${reg2}, ${reg3}"),
+ "aver_u.d");
+}
+
TEST_F(AssemblerMIPS64Test, FaddW) {
DriverStr(RepeatVVV(&mips64::Mips64Assembler::FaddW, "fadd.w ${reg1}, ${reg2}, ${reg3}"),
"fadd.w");
@@ -2890,6 +3220,26 @@ TEST_F(AssemblerMIPS64Test, StD) {
"st.d");
}
+TEST_F(AssemblerMIPS64Test, IlvrB) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvrB, "ilvr.b ${reg1}, ${reg2}, ${reg3}"),
+ "ilvr.b");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvrH) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvrH, "ilvr.h ${reg1}, ${reg2}, ${reg3}"),
+ "ilvr.h");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvrW) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvrW, "ilvr.w ${reg1}, ${reg2}, ${reg3}"),
+ "ilvr.w");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvrD) {
+ DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvrD, "ilvr.d ${reg1}, ${reg2}, ${reg3}"),
+ "ilvr.d");
+}
+
#undef __
} // namespace art
diff --git a/compiler/utils/swap_space.cc b/compiler/utils/swap_space.cc
index a1eb08e041..4f6c915142 100644
--- a/compiler/utils/swap_space.cc
+++ b/compiler/utils/swap_space.cc
@@ -23,7 +23,7 @@
#include "base/logging.h"
#include "base/macros.h"
#include "base/mutex.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
namespace art {
diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h
index c286b820fe..0ff9fc69ed 100644
--- a/compiler/utils/swap_space.h
+++ b/compiler/utils/swap_space.h
@@ -78,7 +78,7 @@ class SwapSpace {
mutable FreeByStartSet::const_iterator free_by_start_entry;
};
struct FreeBySizeComparator {
- bool operator()(const FreeBySizeEntry& lhs, const FreeBySizeEntry& rhs) {
+ bool operator()(const FreeBySizeEntry& lhs, const FreeBySizeEntry& rhs) const {
if (lhs.size != rhs.size) {
return lhs.size < rhs.size;
} else {
diff --git a/compiler/utils/type_reference.h b/compiler/utils/type_reference.h
deleted file mode 100644
index a0fa1a4a63..0000000000
--- a/compiler/utils/type_reference.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_TYPE_REFERENCE_H_
-#define ART_COMPILER_UTILS_TYPE_REFERENCE_H_
-
-#include <stdint.h>
-
-#include "base/logging.h"
-#include "dex_file_types.h"
-#include "string_reference.h"
-
-namespace art {
-
-class DexFile;
-
-// A type is located by its DexFile and the string_ids_ table index into that DexFile.
-struct TypeReference {
- TypeReference(const DexFile* file, dex::TypeIndex index) : dex_file(file), type_index(index) { }
-
- const DexFile* dex_file;
- dex::TypeIndex type_index;
-};
-
-// Compare the actual referenced type names. Used for type reference deduplication.
-struct TypeReferenceValueComparator {
- bool operator()(TypeReference tr1, TypeReference tr2) const {
- // Note that we want to deduplicate identical boot image types even if they are
- // referenced by different dex files, so we simply compare the descriptors.
- StringReference sr1(tr1.dex_file, tr1.dex_file->GetTypeId(tr1.type_index).descriptor_idx_);
- StringReference sr2(tr2.dex_file, tr2.dex_file->GetTypeId(tr2.type_index).descriptor_idx_);
- return StringReferenceValueComparator()(sr1, sr2);
- }
-};
-
-} // namespace art
-
-#endif // ART_COMPILER_UTILS_TYPE_REFERENCE_H_
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 1736618363..bef32f8254 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1238,6 +1238,139 @@ void X86Assembler::pavgw(XmmRegister dst, XmmRegister src) {
EmitXmmRegisterOperand(dst, src);
}
+void X86Assembler::pminsb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x38);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxsb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3C);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pminsw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xEA);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxsw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xEE);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pminsd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x39);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxsd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3D);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pminub(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xDA);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxub(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xDE);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pminuw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3A);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxuw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3E);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pminud(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3B);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxud(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3F);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::minps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x5D);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::maxps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x5F);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::minpd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x5D);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::maxpd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x5F);
+ EmitXmmRegisterOperand(dst, src);
+}
void X86Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index a747cda7bd..c4bb9ee18a 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -498,6 +498,25 @@ class X86Assembler FINAL : public Assembler {
void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void pavgw(XmmRegister dst, XmmRegister src);
+ void pminsb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void pmaxsb(XmmRegister dst, XmmRegister src);
+ void pminsw(XmmRegister dst, XmmRegister src);
+ void pmaxsw(XmmRegister dst, XmmRegister src);
+ void pminsd(XmmRegister dst, XmmRegister src);
+ void pmaxsd(XmmRegister dst, XmmRegister src);
+
+ void pminub(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void pmaxub(XmmRegister dst, XmmRegister src);
+ void pminuw(XmmRegister dst, XmmRegister src);
+ void pmaxuw(XmmRegister dst, XmmRegister src);
+ void pminud(XmmRegister dst, XmmRegister src);
+ void pmaxud(XmmRegister dst, XmmRegister src);
+
+ void minps(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void maxps(XmmRegister dst, XmmRegister src);
+ void minpd(XmmRegister dst, XmmRegister src);
+ void maxpd(XmmRegister dst, XmmRegister src);
+
void pcmpeqb(XmmRegister dst, XmmRegister src);
void pcmpeqw(XmmRegister dst, XmmRegister src);
void pcmpeqd(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index f75f972265..34f2a47c27 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -613,6 +613,70 @@ TEST_F(AssemblerX86Test, PAvgW) {
DriverStr(RepeatFF(&x86::X86Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw");
}
+TEST_F(AssemblerX86Test, PMinSB) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pminsb, "pminsb %{reg2}, %{reg1}"), "pminsb");
+}
+
+TEST_F(AssemblerX86Test, PMaxSB) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pmaxsb, "pmaxsb %{reg2}, %{reg1}"), "pmaxsb");
+}
+
+TEST_F(AssemblerX86Test, PMinSW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pminsw, "pminsw %{reg2}, %{reg1}"), "pminsw");
+}
+
+TEST_F(AssemblerX86Test, PMaxSW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pmaxsw, "pmaxsw %{reg2}, %{reg1}"), "pmaxsw");
+}
+
+TEST_F(AssemblerX86Test, PMinSD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pminsd, "pminsd %{reg2}, %{reg1}"), "pminsd");
+}
+
+TEST_F(AssemblerX86Test, PMaxSD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pmaxsd, "pmaxsd %{reg2}, %{reg1}"), "pmaxsd");
+}
+
+TEST_F(AssemblerX86Test, PMinUB) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pminub, "pminub %{reg2}, %{reg1}"), "pminub");
+}
+
+TEST_F(AssemblerX86Test, PMaxUB) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pmaxub, "pmaxub %{reg2}, %{reg1}"), "pmaxub");
+}
+
+TEST_F(AssemblerX86Test, PMinUW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pminuw, "pminuw %{reg2}, %{reg1}"), "pminuw");
+}
+
+TEST_F(AssemblerX86Test, PMaxUW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pmaxuw, "pmaxuw %{reg2}, %{reg1}"), "pmaxuw");
+}
+
+TEST_F(AssemblerX86Test, PMinUD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pminud, "pminud %{reg2}, %{reg1}"), "pminud");
+}
+
+TEST_F(AssemblerX86Test, PMaxUD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pmaxud, "pmaxud %{reg2}, %{reg1}"), "pmaxud");
+}
+
+TEST_F(AssemblerX86Test, MinPS) {
+ DriverStr(RepeatFF(&x86::X86Assembler::minps, "minps %{reg2}, %{reg1}"), "minps");
+}
+
+TEST_F(AssemblerX86Test, MaxPS) {
+ DriverStr(RepeatFF(&x86::X86Assembler::maxps, "maxps %{reg2}, %{reg1}"), "maxps");
+}
+
+TEST_F(AssemblerX86Test, MinPD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::minpd, "minpd %{reg2}, %{reg1}"), "minpd");
+}
+
+TEST_F(AssemblerX86Test, MaxPD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::maxpd, "maxpd %{reg2}, %{reg1}"), "maxpd");
+}
+
TEST_F(AssemblerX86Test, PCmpeqB) {
DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "cmpeqb");
}
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 1b7a4850db..82d1174a25 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1445,6 +1445,156 @@ void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) {
EmitXmmRegisterOperand(dst.LowBits(), src);
}
+void X86_64Assembler::pminsb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x38);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxsb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3C);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pminsw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xEA);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxsw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xEE);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pminsd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x39);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxsd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3D);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pminub(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xDA);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxub(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xDE);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pminuw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3A);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxuw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3E);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pminud(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3B);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxud(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3F);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::minps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x5D);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::maxps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x5F);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::minpd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x5D);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::maxpd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x5F);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 0ddc46ca44..6e584fece1 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -526,6 +526,25 @@ class X86_64Assembler FINAL : public Assembler {
void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void pavgw(XmmRegister dst, XmmRegister src);
+ void pminsb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void pmaxsb(XmmRegister dst, XmmRegister src);
+ void pminsw(XmmRegister dst, XmmRegister src);
+ void pmaxsw(XmmRegister dst, XmmRegister src);
+ void pminsd(XmmRegister dst, XmmRegister src);
+ void pmaxsd(XmmRegister dst, XmmRegister src);
+
+ void pminub(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void pmaxub(XmmRegister dst, XmmRegister src);
+ void pminuw(XmmRegister dst, XmmRegister src);
+ void pmaxuw(XmmRegister dst, XmmRegister src);
+ void pminud(XmmRegister dst, XmmRegister src);
+ void pmaxud(XmmRegister dst, XmmRegister src);
+
+ void minps(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void maxps(XmmRegister dst, XmmRegister src);
+ void minpd(XmmRegister dst, XmmRegister src);
+ void maxpd(XmmRegister dst, XmmRegister src);
+
void pcmpeqb(XmmRegister dst, XmmRegister src);
void pcmpeqw(XmmRegister dst, XmmRegister src);
void pcmpeqd(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index e7d8401e29..b57400334c 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1301,6 +1301,70 @@ TEST_F(AssemblerX86_64Test, Pavgw) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw");
}
+TEST_F(AssemblerX86_64Test, Pminsb) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsb, "pminsb %{reg2}, %{reg1}"), "pminsb");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxsb) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxsb, "pmaxsb %{reg2}, %{reg1}"), "pmaxsb");
+}
+
+TEST_F(AssemblerX86_64Test, Pminsw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsw, "pminsw %{reg2}, %{reg1}"), "pminsw");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxsw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxsw, "pmaxsw %{reg2}, %{reg1}"), "pmaxsw");
+}
+
+TEST_F(AssemblerX86_64Test, Pminsd) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsd, "pminsd %{reg2}, %{reg1}"), "pminsd");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxsd) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxsd, "pmaxsd %{reg2}, %{reg1}"), "pmaxsd");
+}
+
+TEST_F(AssemblerX86_64Test, Pminub) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminub, "pminub %{reg2}, %{reg1}"), "pminub");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxub) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxub, "pmaxub %{reg2}, %{reg1}"), "pmaxub");
+}
+
+TEST_F(AssemblerX86_64Test, Pminuw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminuw, "pminuw %{reg2}, %{reg1}"), "pminuw");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxuw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxuw, "pmaxuw %{reg2}, %{reg1}"), "pmaxuw");
+}
+
+TEST_F(AssemblerX86_64Test, Pminud) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminud, "pminud %{reg2}, %{reg1}"), "pminud");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxud) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxud, "pmaxud %{reg2}, %{reg1}"), "pmaxud");
+}
+
+TEST_F(AssemblerX86_64Test, Minps) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::minps, "minps %{reg2}, %{reg1}"), "minps");
+}
+
+TEST_F(AssemblerX86_64Test, Maxps) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::maxps, "maxps %{reg2}, %{reg1}"), "maxps");
+}
+
+TEST_F(AssemblerX86_64Test, Minpd) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::minpd, "minpd %{reg2}, %{reg1}"), "minpd");
+}
+
+TEST_F(AssemblerX86_64Test, Maxpd) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::maxpd, "maxpd %{reg2}, %{reg1}"), "maxpd");
+}
+
TEST_F(AssemblerX86_64Test, PCmpeqb) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "pcmpeqb");
}
diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc
index 4d55eb08b2..dd09fed06e 100644
--- a/compiler/verifier_deps_test.cc
+++ b/compiler/verifier_deps_test.cc
@@ -25,8 +25,8 @@
#include "dex/verified_method.h"
#include "dex_file.h"
#include "dex_file_types.h"
+#include "driver/compiler_driver-inl.h"
#include "driver/compiler_options.h"
-#include "driver/compiler_driver.h"
#include "handle_scope-inl.h"
#include "indenter.h"
#include "mirror/class_loader.h"