diff options
Diffstat (limited to 'compiler')
142 files changed, 11959 insertions, 4225 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index 6ef866a3c6..307a42cbba 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -67,6 +67,7 @@ art_cc_defaults { "optimizing/intrinsics.cc", "optimizing/licm.cc", "optimizing/linear_order.cc", + "optimizing/load_store_analysis.cc", "optimizing/load_store_elimination.cc", "optimizing/locations.cc", "optimizing/loop_optimization.cc", @@ -115,6 +116,7 @@ art_cc_defaults { "optimizing/intrinsics_arm.cc", "optimizing/intrinsics_arm_vixl.cc", "optimizing/nodes_shared.cc", + "optimizing/scheduler_arm.cc", "utils/arm/assembler_arm.cc", "utils/arm/assembler_arm_vixl.cc", "utils/arm/assembler_thumb2.cc", @@ -338,6 +340,7 @@ art_cc_test { "elf_writer_test.cc", "exception_test.cc", "image_test.cc", + "image_write_read_test.cc", "jni/jni_compiler_test.cc", "linker/multi_oat_relative_patcher_test.cc", "linker/output_stream_test.cc", @@ -372,6 +375,7 @@ art_cc_test { "jni/jni_cfi_test.cc", "optimizing/codegen_test.cc", + "optimizing/load_store_analysis_test.cc", "optimizing/optimizing_cfi_test.cc", "optimizing/scheduler_test.cc", ], diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index 39edd1eb02..a1ee68faeb 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -33,7 +33,7 @@ #include "mirror/object-inl.h" #include "oat_quick_method_header.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "utils.h" namespace art { diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h index d0f66e2d8e..0ca23a5c50 100644 --- a/compiler/compiled_method.h +++ b/compiler/compiled_method.h @@ -119,24 +119,24 @@ class LinkerPatch { // choose to squeeze the Type into fewer than 8 bits, we'll have to declare // patch_type_ as an uintN_t and do explicit static_cast<>s. enum class Type : uint8_t { - kMethod, + kMethodRelative, // NOTE: Actual patching is instruction_set-dependent. kCall, kCallRelative, // NOTE: Actual patching is instruction_set-dependent. - kType, kTypeRelative, // NOTE: Actual patching is instruction_set-dependent. kTypeBssEntry, // NOTE: Actual patching is instruction_set-dependent. - kString, kStringRelative, // NOTE: Actual patching is instruction_set-dependent. kStringBssEntry, // NOTE: Actual patching is instruction_set-dependent. kDexCacheArray, // NOTE: Actual patching is instruction_set-dependent. kBakerReadBarrierBranch, // NOTE: Actual patching is instruction_set-dependent. }; - static LinkerPatch MethodPatch(size_t literal_offset, - const DexFile* target_dex_file, - uint32_t target_method_idx) { - LinkerPatch patch(literal_offset, Type::kMethod, target_dex_file); + static LinkerPatch RelativeMethodPatch(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t target_method_idx) { + LinkerPatch patch(literal_offset, Type::kMethodRelative, target_dex_file); patch.method_idx_ = target_method_idx; + patch.pc_insn_offset_ = pc_insn_offset; return patch; } @@ -156,14 +156,6 @@ class LinkerPatch { return patch; } - static LinkerPatch TypePatch(size_t literal_offset, - const DexFile* target_dex_file, - uint32_t target_type_idx) { - LinkerPatch patch(literal_offset, Type::kType, target_dex_file); - patch.type_idx_ = target_type_idx; - return patch; - } - static LinkerPatch RelativeTypePatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t pc_insn_offset, @@ -184,14 +176,6 @@ class LinkerPatch { return patch; } - static LinkerPatch StringPatch(size_t literal_offset, - const DexFile* target_dex_file, - uint32_t target_string_idx) { - LinkerPatch patch(literal_offset, Type::kString, target_dex_file); - patch.string_idx_ = target_string_idx; - return patch; - } - static LinkerPatch RelativeStringPatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t pc_insn_offset, @@ -244,6 +228,7 @@ class LinkerPatch { bool IsPcRelative() const { switch (GetType()) { + case Type::kMethodRelative: case Type::kCallRelative: case Type::kTypeRelative: case Type::kTypeBssEntry: @@ -258,36 +243,32 @@ class LinkerPatch { } MethodReference TargetMethod() const { - DCHECK(patch_type_ == Type::kMethod || + DCHECK(patch_type_ == Type::kMethodRelative || patch_type_ == Type::kCall || patch_type_ == Type::kCallRelative); return MethodReference(target_dex_file_, method_idx_); } const DexFile* TargetTypeDexFile() const { - DCHECK(patch_type_ == Type::kType || - patch_type_ == Type::kTypeRelative || + DCHECK(patch_type_ == Type::kTypeRelative || patch_type_ == Type::kTypeBssEntry); return target_dex_file_; } dex::TypeIndex TargetTypeIndex() const { - DCHECK(patch_type_ == Type::kType || - patch_type_ == Type::kTypeRelative || + DCHECK(patch_type_ == Type::kTypeRelative || patch_type_ == Type::kTypeBssEntry); return dex::TypeIndex(type_idx_); } const DexFile* TargetStringDexFile() const { - DCHECK(patch_type_ == Type::kString || - patch_type_ == Type::kStringRelative || + DCHECK(patch_type_ == Type::kStringRelative || patch_type_ == Type::kStringBssEntry); return target_dex_file_; } dex::StringIndex TargetStringIndex() const { - DCHECK(patch_type_ == Type::kString || - patch_type_ == Type::kStringRelative || + DCHECK(patch_type_ == Type::kStringRelative || patch_type_ == Type::kStringBssEntry); return dex::StringIndex(string_idx_); } @@ -303,7 +284,8 @@ class LinkerPatch { } uint32_t PcInsnOffset() const { - DCHECK(patch_type_ == Type::kTypeRelative || + DCHECK(patch_type_ == Type::kMethodRelative || + patch_type_ == Type::kTypeRelative || patch_type_ == Type::kTypeBssEntry || patch_type_ == Type::kStringRelative || patch_type_ == Type::kStringBssEntry || diff --git a/compiler/compiled_method_test.cc b/compiler/compiled_method_test.cc index 99ee875da2..72b2282ade 100644 --- a/compiler/compiled_method_test.cc +++ b/compiler/compiled_method_test.cc @@ -50,10 +50,14 @@ TEST(CompiledMethod, LinkerPatchOperators) { const DexFile* dex_file1 = reinterpret_cast<const DexFile*>(1); const DexFile* dex_file2 = reinterpret_cast<const DexFile*>(2); LinkerPatch patches[] = { - LinkerPatch::MethodPatch(16u, dex_file1, 1000u), - LinkerPatch::MethodPatch(16u, dex_file1, 1001u), - LinkerPatch::MethodPatch(16u, dex_file2, 1000u), - LinkerPatch::MethodPatch(16u, dex_file2, 1001u), // Index 3. + LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3000u, 1000u), + LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3001u, 1000u), + LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3000u, 1001u), + LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3001u, 1001u), // Index 3. + LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3000u, 1000u), + LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3001u, 1000u), + LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3000u, 1001u), + LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3001u, 1001u), LinkerPatch::CodePatch(16u, dex_file1, 1000u), LinkerPatch::CodePatch(16u, dex_file1, 1001u), LinkerPatch::CodePatch(16u, dex_file2, 1000u), @@ -62,10 +66,38 @@ TEST(CompiledMethod, LinkerPatchOperators) { LinkerPatch::RelativeCodePatch(16u, dex_file1, 1001u), LinkerPatch::RelativeCodePatch(16u, dex_file2, 1000u), LinkerPatch::RelativeCodePatch(16u, dex_file2, 1001u), - LinkerPatch::TypePatch(16u, dex_file1, 1000u), - LinkerPatch::TypePatch(16u, dex_file1, 1001u), - LinkerPatch::TypePatch(16u, dex_file2, 1000u), - LinkerPatch::TypePatch(16u, dex_file2, 1001u), + LinkerPatch::RelativeTypePatch(16u, dex_file1, 3000u, 1000u), + LinkerPatch::RelativeTypePatch(16u, dex_file1, 3001u, 1000u), + LinkerPatch::RelativeTypePatch(16u, dex_file1, 3000u, 1001u), + LinkerPatch::RelativeTypePatch(16u, dex_file1, 3001u, 1001u), + LinkerPatch::RelativeTypePatch(16u, dex_file2, 3000u, 1000u), + LinkerPatch::RelativeTypePatch(16u, dex_file2, 3001u, 1000u), + LinkerPatch::RelativeTypePatch(16u, dex_file2, 3000u, 1001u), + LinkerPatch::RelativeTypePatch(16u, dex_file2, 3001u, 1001u), + LinkerPatch::TypeBssEntryPatch(16u, dex_file1, 3000u, 1000u), + LinkerPatch::TypeBssEntryPatch(16u, dex_file1, 3001u, 1000u), + LinkerPatch::TypeBssEntryPatch(16u, dex_file1, 3000u, 1001u), + LinkerPatch::TypeBssEntryPatch(16u, dex_file1, 3001u, 1001u), + LinkerPatch::TypeBssEntryPatch(16u, dex_file2, 3000u, 1000u), + LinkerPatch::TypeBssEntryPatch(16u, dex_file2, 3001u, 1000u), + LinkerPatch::TypeBssEntryPatch(16u, dex_file2, 3000u, 1001u), + LinkerPatch::TypeBssEntryPatch(16u, dex_file2, 3001u, 1001u), + LinkerPatch::RelativeStringPatch(16u, dex_file1, 3000u, 1000u), + LinkerPatch::RelativeStringPatch(16u, dex_file1, 3001u, 1000u), + LinkerPatch::RelativeStringPatch(16u, dex_file1, 3000u, 1001u), + LinkerPatch::RelativeStringPatch(16u, dex_file1, 3001u, 1001u), + LinkerPatch::RelativeStringPatch(16u, dex_file2, 3000u, 1000u), + LinkerPatch::RelativeStringPatch(16u, dex_file2, 3001u, 1000u), + LinkerPatch::RelativeStringPatch(16u, dex_file2, 3000u, 1001u), + LinkerPatch::RelativeStringPatch(16u, dex_file2, 3001u, 1001u), + LinkerPatch::StringBssEntryPatch(16u, dex_file1, 3000u, 1000u), + LinkerPatch::StringBssEntryPatch(16u, dex_file1, 3001u, 1000u), + LinkerPatch::StringBssEntryPatch(16u, dex_file1, 3000u, 1001u), + LinkerPatch::StringBssEntryPatch(16u, dex_file1, 3001u, 1001u), + LinkerPatch::StringBssEntryPatch(16u, dex_file2, 3000u, 1000u), + LinkerPatch::StringBssEntryPatch(16u, dex_file2, 3001u, 1000u), + LinkerPatch::StringBssEntryPatch(16u, dex_file2, 3000u, 1001u), + LinkerPatch::StringBssEntryPatch(16u, dex_file2, 3001u, 1001u), LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3000u, 2000u), LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3001u, 2000u), LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3000u, 2001u), @@ -74,10 +106,19 @@ TEST(CompiledMethod, LinkerPatchOperators) { LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3001u, 2000u), LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3000u, 2001u), LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3001u, 2001u), - LinkerPatch::MethodPatch(32u, dex_file1, 1000u), - LinkerPatch::MethodPatch(32u, dex_file1, 1001u), - LinkerPatch::MethodPatch(32u, dex_file2, 1000u), - LinkerPatch::MethodPatch(32u, dex_file2, 1001u), + LinkerPatch::BakerReadBarrierBranchPatch(16u, 0u, 0u), + LinkerPatch::BakerReadBarrierBranchPatch(16u, 0u, 1u), + LinkerPatch::BakerReadBarrierBranchPatch(16u, 1u, 0u), + LinkerPatch::BakerReadBarrierBranchPatch(16u, 1u, 1u), + + LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3000u, 1000u), + LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3001u, 1000u), + LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3000u, 1001u), + LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3001u, 1001u), + LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3000u, 1000u), + LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3001u, 1000u), + LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3000u, 1001u), + LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3001u, 1001u), LinkerPatch::CodePatch(32u, dex_file1, 1000u), LinkerPatch::CodePatch(32u, dex_file1, 1001u), LinkerPatch::CodePatch(32u, dex_file2, 1000u), @@ -86,10 +127,38 @@ TEST(CompiledMethod, LinkerPatchOperators) { LinkerPatch::RelativeCodePatch(32u, dex_file1, 1001u), LinkerPatch::RelativeCodePatch(32u, dex_file2, 1000u), LinkerPatch::RelativeCodePatch(32u, dex_file2, 1001u), - LinkerPatch::TypePatch(32u, dex_file1, 1000u), - LinkerPatch::TypePatch(32u, dex_file1, 1001u), - LinkerPatch::TypePatch(32u, dex_file2, 1000u), - LinkerPatch::TypePatch(32u, dex_file2, 1001u), + LinkerPatch::RelativeTypePatch(32u, dex_file1, 3000u, 1000u), + LinkerPatch::RelativeTypePatch(32u, dex_file1, 3001u, 1000u), + LinkerPatch::RelativeTypePatch(32u, dex_file1, 3000u, 1001u), + LinkerPatch::RelativeTypePatch(32u, dex_file1, 3001u, 1001u), + LinkerPatch::RelativeTypePatch(32u, dex_file2, 3000u, 1000u), + LinkerPatch::RelativeTypePatch(32u, dex_file2, 3001u, 1000u), + LinkerPatch::RelativeTypePatch(32u, dex_file2, 3000u, 1001u), + LinkerPatch::RelativeTypePatch(32u, dex_file2, 3001u, 1001u), + LinkerPatch::TypeBssEntryPatch(32u, dex_file1, 3000u, 1000u), + LinkerPatch::TypeBssEntryPatch(32u, dex_file1, 3001u, 1000u), + LinkerPatch::TypeBssEntryPatch(32u, dex_file1, 3000u, 1001u), + LinkerPatch::TypeBssEntryPatch(32u, dex_file1, 3001u, 1001u), + LinkerPatch::TypeBssEntryPatch(32u, dex_file2, 3000u, 1000u), + LinkerPatch::TypeBssEntryPatch(32u, dex_file2, 3001u, 1000u), + LinkerPatch::TypeBssEntryPatch(32u, dex_file2, 3000u, 1001u), + LinkerPatch::TypeBssEntryPatch(32u, dex_file2, 3001u, 1001u), + LinkerPatch::RelativeStringPatch(32u, dex_file1, 3000u, 1000u), + LinkerPatch::RelativeStringPatch(32u, dex_file1, 3001u, 1000u), + LinkerPatch::RelativeStringPatch(32u, dex_file1, 3000u, 1001u), + LinkerPatch::RelativeStringPatch(32u, dex_file1, 3001u, 1001u), + LinkerPatch::RelativeStringPatch(32u, dex_file2, 3000u, 1000u), + LinkerPatch::RelativeStringPatch(32u, dex_file2, 3001u, 1000u), + LinkerPatch::RelativeStringPatch(32u, dex_file2, 3000u, 1001u), + LinkerPatch::RelativeStringPatch(32u, dex_file2, 3001u, 1001u), + LinkerPatch::StringBssEntryPatch(32u, dex_file1, 3000u, 1000u), + LinkerPatch::StringBssEntryPatch(32u, dex_file1, 3001u, 1000u), + LinkerPatch::StringBssEntryPatch(32u, dex_file1, 3000u, 1001u), + LinkerPatch::StringBssEntryPatch(32u, dex_file1, 3001u, 1001u), + LinkerPatch::StringBssEntryPatch(32u, dex_file2, 3000u, 1000u), + LinkerPatch::StringBssEntryPatch(32u, dex_file2, 3001u, 1000u), + LinkerPatch::StringBssEntryPatch(32u, dex_file2, 3000u, 1001u), + LinkerPatch::StringBssEntryPatch(32u, dex_file2, 3001u, 1001u), LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3000u, 2000u), LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3001u, 2000u), LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3000u, 2001u), @@ -98,7 +167,12 @@ TEST(CompiledMethod, LinkerPatchOperators) { LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3001u, 2000u), LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3000u, 2001u), LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3001u, 2001u), - LinkerPatch::MethodPatch(16u, dex_file2, 1001u), // identical with patch as index 3. + LinkerPatch::BakerReadBarrierBranchPatch(32u, 0u, 0u), + LinkerPatch::BakerReadBarrierBranchPatch(32u, 0u, 1u), + LinkerPatch::BakerReadBarrierBranchPatch(32u, 1u, 0u), + LinkerPatch::BakerReadBarrierBranchPatch(32u, 1u, 1u), + + LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3001u, 1001u), // Same as patch at index 3. }; constexpr size_t last_index = arraysize(patches) - 1u; diff --git a/compiler/compiler.h b/compiler/compiler.h index 908d3669ed..cd4c59101e 100644 --- a/compiler/compiler.h +++ b/compiler/compiler.h @@ -25,11 +25,11 @@ namespace art { namespace jit { class JitCodeCache; -} +} // namespace jit namespace mirror { class ClassLoader; class DexCache; -} +} // namespace mirror class ArtMethod; class CompilerDriver; diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h index 558c7d5754..de32351abf 100644 --- a/compiler/debug/elf_debug_info_writer.h +++ b/compiler/debug/elf_debug_info_writer.h @@ -411,7 +411,7 @@ class ElfCompilationUnitWriter { for (const auto& base_class_reference : base_class_references) { size_t reference_offset = base_class_reference.first; mirror::Class* base_class = base_class_reference.second; - const auto& it = class_declarations.find(base_class); + const auto it = class_declarations.find(base_class); if (it != class_declarations.end()) { info_.UpdateUint32(reference_offset, it->second); } else { @@ -512,7 +512,7 @@ class ElfCompilationUnitWriter { using namespace dwarf; // NOLINT. For easy access to DWARF constants. DCHECK(!desc.empty()); - const auto& it = type_cache_.find(desc); + const auto it = type_cache_.find(desc); if (it != type_cache_.end()) { return it->second; } diff --git a/compiler/debug/elf_debug_loc_writer.h b/compiler/debug/elf_debug_loc_writer.h index cbfdbddd1d..bf47e8f3d9 100644 --- a/compiler/debug/elf_debug_loc_writer.h +++ b/compiler/debug/elf_debug_loc_writer.h @@ -85,7 +85,7 @@ struct VariableLocation { // The result will cover all ranges where the variable is in scope. // PCs corresponding to stackmap with dex register map are accurate, // all other PCs are best-effort only. -std::vector<VariableLocation> GetVariableLocations( +static std::vector<VariableLocation> GetVariableLocations( const MethodDebugInfo* method_info, const std::vector<DexRegisterMap>& dex_register_maps, uint16_t vreg, diff --git a/compiler/debug/elf_debug_writer.cc b/compiler/debug/elf_debug_writer.cc index d1c10a9246..7fa6e146c5 100644 --- a/compiler/debug/elf_debug_writer.cc +++ b/compiler/debug/elf_debug_writer.cc @@ -30,6 +30,7 @@ #include "debug/method_debug_info.h" #include "elf_builder.h" #include "linker/vector_output_stream.h" +#include "oat.h" namespace art { namespace debug { diff --git a/compiler/debug/elf_debug_writer.h b/compiler/debug/elf_debug_writer.h index 07f7229827..5d688108e7 100644 --- a/compiler/debug/elf_debug_writer.h +++ b/compiler/debug/elf_debug_writer.h @@ -29,7 +29,7 @@ namespace art { class OatHeader; namespace mirror { class Class; -} +} // namespace mirror namespace debug { struct MethodDebugInfo; diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc index 1573062033..2db99cda3e 100644 --- a/compiler/dex/dex_to_dex_compiler.cc +++ b/compiler/dex/dex_to_dex_compiler.cc @@ -28,7 +28,7 @@ #include "driver/compiler_driver.h" #include "driver/dex_compilation_unit.h" #include "mirror/dex_cache.h" -#include "thread-inl.h" +#include "thread-current-inl.h" namespace art { namespace optimizer { diff --git a/compiler/dex/inline_method_analyser.cc b/compiler/dex/inline_method_analyser.cc index e691a67dc0..257229101c 100644 --- a/compiler/dex/inline_method_analyser.cc +++ b/compiler/dex/inline_method_analyser.cc @@ -433,8 +433,11 @@ bool InlineMethodAnalyser::AnalyseMethodCode(ArtMethod* method, InlineMethod* re // Native or abstract. return false; } - return AnalyseMethodCode( - code_item, method->ToMethodReference(), method->IsStatic(), method, result); + return AnalyseMethodCode(code_item, + MethodReference(method->GetDexFile(), method->GetDexMethodIndex()), + method->IsStatic(), + method, + result); } bool InlineMethodAnalyser::AnalyseMethodCode(const DexFile::CodeItem* code_item, diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc index 3f0df3b2c8..b87cb61ed6 100644 --- a/compiler/dex/verification_results.cc +++ b/compiler/dex/verification_results.cc @@ -17,12 +17,13 @@ #include "verification_results.h" #include "base/logging.h" -#include "base/stl_util.h" #include "base/mutex-inl.h" +#include "base/stl_util.h" #include "driver/compiler_driver.h" #include "driver/compiler_options.h" +#include "runtime.h" #include "thread.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "utils/atomic_method_ref_map-inl.h" #include "verified_method.h" #include "verifier/method_verifier-inl.h" @@ -82,7 +83,12 @@ void VerificationResults::ProcessVerifiedMethod(verifier::MethodVerifier* method // TODO: Investigate why are we doing the work again for this method and try to avoid it. LOG(WARNING) << "Method processed more than once: " << ref.PrettyMethod(); if (!Runtime::Current()->UseJitCompilation()) { - DCHECK_EQ(existing->GetSafeCastSet().size(), verified_method->GetSafeCastSet().size()); + if (kIsDebugBuild) { + auto ex_set = existing->GetSafeCastSet(); + auto ve_set = verified_method->GetSafeCastSet(); + CHECK_EQ(ex_set == nullptr, ve_set == nullptr); + CHECK((ex_set == nullptr) || (ex_set->size() == ve_set->size())); + } } // Let the unique_ptr delete the new verified method since there was already an existing one // registered. It is unsafe to replace the existing one since the JIT may be using it to diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc index 608a18aa66..e46dc597fa 100644 --- a/compiler/dex/verified_method.cc +++ b/compiler/dex/verified_method.cc @@ -49,7 +49,10 @@ const VerifiedMethod* VerifiedMethod::Create(verifier::MethodVerifier* method_ve } bool VerifiedMethod::IsSafeCast(uint32_t pc) const { - return std::binary_search(safe_cast_set_.begin(), safe_cast_set_.end(), pc); + if (safe_cast_set_ == nullptr) { + return false; + } + return std::binary_search(safe_cast_set_->begin(), safe_cast_set_->end(), pc); } void VerifiedMethod::GenerateSafeCastSet(verifier::MethodVerifier* method_verifier) { @@ -94,12 +97,16 @@ void VerifiedMethod::GenerateSafeCastSet(verifier::MethodVerifier* method_verifi /* strict */ true, /* assignable */ true); } + if (safe_cast_set_ == nullptr) { + safe_cast_set_.reset(new SafeCastSet()); + } // Verify ordering for push_back() to the sorted vector. - DCHECK(safe_cast_set_.empty() || safe_cast_set_.back() < dex_pc); - safe_cast_set_.push_back(dex_pc); + DCHECK(safe_cast_set_->empty() || safe_cast_set_->back() < dex_pc); + safe_cast_set_->push_back(dex_pc); } } } + DCHECK(safe_cast_set_ == nullptr || !safe_cast_set_->empty()); } } // namespace art diff --git a/compiler/dex/verified_method.h b/compiler/dex/verified_method.h index 439e69ece9..64b3f448e6 100644 --- a/compiler/dex/verified_method.h +++ b/compiler/dex/verified_method.h @@ -43,8 +43,8 @@ class VerifiedMethod { REQUIRES_SHARED(Locks::mutator_lock_); ~VerifiedMethod() = default; - const SafeCastSet& GetSafeCastSet() const { - return safe_cast_set_; + const SafeCastSet* GetSafeCastSet() const { + return safe_cast_set_.get(); } // Returns true if the cast can statically be verified to be redundant @@ -69,7 +69,7 @@ class VerifiedMethod { void GenerateSafeCastSet(verifier::MethodVerifier* method_verifier) REQUIRES_SHARED(Locks::mutator_lock_); - SafeCastSet safe_cast_set_; + std::unique_ptr<SafeCastSet> safe_cast_set_; const uint32_t encountered_error_types_; const bool has_runtime_throw_; diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc index e6a47ba60f..528b0a215b 100644 --- a/compiler/driver/compiled_method_storage.cc +++ b/compiler/driver/compiled_method_storage.cc @@ -21,7 +21,7 @@ #include "base/logging.h" #include "compiled_method.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "utils.h" #include "utils/dedupe_set-inl.h" #include "utils/swap_space.h" diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc index 6572d170e6..bbd28b2576 100644 --- a/compiler/driver/compiled_method_storage_test.cc +++ b/compiler/driver/compiled_method_storage_test.cc @@ -71,11 +71,11 @@ TEST(CompiledMethodStorage, Deduplicate) { }; const LinkerPatch raw_patches1[] = { LinkerPatch::CodePatch(0u, nullptr, 1u), - LinkerPatch::MethodPatch(4u, nullptr, 1u), + LinkerPatch::RelativeMethodPatch(4u, nullptr, 0u, 1u), }; const LinkerPatch raw_patches2[] = { LinkerPatch::CodePatch(0u, nullptr, 1u), - LinkerPatch::MethodPatch(4u, nullptr, 2u), + LinkerPatch::RelativeMethodPatch(4u, nullptr, 0u, 2u), }; ArrayRef<const LinkerPatch> patches[] = { ArrayRef<const LinkerPatch>(raw_patches1), diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h index 582330611d..8cc1cc38e2 100644 --- a/compiler/driver/compiler_driver-inl.h +++ b/compiler/driver/compiler_driver-inl.h @@ -24,10 +24,11 @@ #include "base/enums.h" #include "class_linker-inl.h" #include "dex_compilation_unit.h" +#include "handle_scope-inl.h" #include "mirror/class_loader.h" #include "mirror/dex_cache-inl.h" +#include "runtime.h" #include "scoped_thread_state_change-inl.h" -#include "handle_scope-inl.h" namespace art { @@ -149,6 +150,11 @@ inline ArtMethod* CompilerDriver::ResolveMethod( return resolved_method; } +inline VerificationResults* CompilerDriver::GetVerificationResults() const { + DCHECK(Runtime::Current()->IsAotCompiler()); + return verification_results_; +} + } // namespace art #endif // ART_COMPILER_DRIVER_COMPILER_DRIVER_INL_H_ diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index a8ab7c6091..93f678c64a 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -28,6 +28,7 @@ #include "art_field-inl.h" #include "art_method-inl.h" +#include "base/arena_allocator.h" #include "base/array_ref.h" #include "base/bit_vector.h" #include "base/enums.h" @@ -36,7 +37,6 @@ #include "base/time_utils.h" #include "base/timing_logger.h" #include "class_linker-inl.h" -#include "compiled_class.h" #include "compiled_method.h" #include "compiler.h" #include "compiler_callbacks.h" @@ -317,11 +317,6 @@ CompilerDriver::CompilerDriver( } CompilerDriver::~CompilerDriver() { - Thread* self = Thread::Current(); - { - MutexLock mu(self, compiled_classes_lock_); - STLDeleteValues(&compiled_classes_); - } compiled_methods_.Visit([this](const MethodReference& ref ATTRIBUTE_UNUSED, CompiledMethod* method) { if (method != nullptr) { @@ -1005,7 +1000,8 @@ bool CompilerDriver::ShouldCompileBasedOnProfile(const MethodReference& method_r if (profile_compilation_info_ == nullptr) { return false; } - bool result = profile_compilation_info_->ContainsMethod(method_ref); + // TODO: Revisit compiling all startup methods. b/36457259 + bool result = profile_compilation_info_->IsStartupOrHotMethod(method_ref); if (kDebugProfileGuidedCompilation) { LOG(INFO) << "[ProfileGuidedCompilation] " @@ -1978,8 +1974,7 @@ bool CompilerDriver::FastVerify(jobject jclass_loader, if (compiler_only_verifies) { // Just update the compiled_classes_ map. The compiler doesn't need to resolve // the type. - compiled_classes_.Overwrite( - ClassReference(dex_file, i), new CompiledClass(mirror::Class::kStatusVerified)); + compiled_classes_.Overwrite(ClassReference(dex_file, i), mirror::Class::kStatusVerified); } else { // Update the class status, so later compilation stages know they don't need to verify // the class. @@ -2245,7 +2240,7 @@ class InitializeClassVisitor : public CompilationVisitor { public: explicit InitializeClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {} - void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE { + void Visit(size_t class_def_index) OVERRIDE { ATRACE_CALL(); jobject jclass_loader = manager_->GetClassLoader(); const DexFile& dex_file = *manager_->GetDexFile(); @@ -2260,89 +2255,132 @@ class InitializeClassVisitor : public CompilationVisitor { Handle<mirror::Class> klass( hs.NewHandle(manager_->GetClassLinker()->FindClass(soa.Self(), descriptor, class_loader))); - if (klass != nullptr && !SkipClass(jclass_loader, dex_file, klass.Get())) { - // Only try to initialize classes that were successfully verified. - if (klass->IsVerified()) { - // Attempt to initialize the class but bail if we either need to initialize the super-class - // or static fields. - manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, false); - if (!klass->IsInitialized()) { - // We don't want non-trivial class initialization occurring on multiple threads due to - // deadlock problems. For example, a parent class is initialized (holding its lock) that - // refers to a sub-class in its static/class initializer causing it to try to acquire the - // sub-class' lock. While on a second thread the sub-class is initialized (holding its lock) - // after first initializing its parents, whose locks are acquired. This leads to a - // parent-to-child and a child-to-parent lock ordering and consequent potential deadlock. - // We need to use an ObjectLock due to potential suspension in the interpreting code. Rather - // than use a special Object for the purpose we use the Class of java.lang.Class. - Handle<mirror::Class> h_klass(hs.NewHandle(klass->GetClass())); - ObjectLock<mirror::Class> lock(soa.Self(), h_klass); - // Attempt to initialize allowing initialization of parent classes but still not static - // fields. + if (klass != nullptr && !SkipClass(manager_->GetClassLoader(), dex_file, klass.Get())) { + TryInitializeClass(klass, class_loader); + } + // Clear any class not found or verification exceptions. + soa.Self()->ClearException(); + } + + // A helper function for initializing klass. + void TryInitializeClass(Handle<mirror::Class> klass, Handle<mirror::ClassLoader>& class_loader) + REQUIRES_SHARED(Locks::mutator_lock_) { + const DexFile& dex_file = klass->GetDexFile(); + const DexFile::ClassDef* class_def = klass->GetClassDef(); + const DexFile::TypeId& class_type_id = dex_file.GetTypeId(class_def->class_idx_); + const char* descriptor = dex_file.StringDataByIdx(class_type_id.descriptor_idx_); + ScopedObjectAccessUnchecked soa(Thread::Current()); + StackHandleScope<3> hs(soa.Self()); + + mirror::Class::Status old_status = klass->GetStatus();; + // Only try to initialize classes that were successfully verified. + if (klass->IsVerified()) { + // Attempt to initialize the class but bail if we either need to initialize the super-class + // or static fields. + manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, false); + old_status = klass->GetStatus(); + if (!klass->IsInitialized()) { + // We don't want non-trivial class initialization occurring on multiple threads due to + // deadlock problems. For example, a parent class is initialized (holding its lock) that + // refers to a sub-class in its static/class initializer causing it to try to acquire the + // sub-class' lock. While on a second thread the sub-class is initialized (holding its lock) + // after first initializing its parents, whose locks are acquired. This leads to a + // parent-to-child and a child-to-parent lock ordering and consequent potential deadlock. + // We need to use an ObjectLock due to potential suspension in the interpreting code. Rather + // than use a special Object for the purpose we use the Class of java.lang.Class. + Handle<mirror::Class> h_klass(hs.NewHandle(klass->GetClass())); + ObjectLock<mirror::Class> lock(soa.Self(), h_klass); + // Attempt to initialize allowing initialization of parent classes but still not static + // fields. + bool is_superclass_initialized = true; + if (!manager_->GetCompiler()->GetCompilerOptions().IsAppImage()) { + // If not an app image case, the compiler won't initialize too much things and do a fast + // fail, don't check dependencies. manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, true); - if (!klass->IsInitialized()) { + } else { + // For app images, do the initialization recursively and resolve types encountered to make + // sure the compiler runs without error. + is_superclass_initialized = InitializeDependencies(klass, class_loader, soa.Self()); + if (is_superclass_initialized) { + manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, true); + } + } + old_status = klass->GetStatus(); + // If superclass cannot be initialized, no need to proceed. + if (!klass->IsInitialized() && + is_superclass_initialized && + manager_->GetCompiler()->IsImageClass(descriptor)) { + bool can_init_static_fields = false; + if (manager_->GetCompiler()->GetCompilerOptions().IsBootImage()) { // We need to initialize static fields, we only do this for image classes that aren't // marked with the $NoPreloadHolder (which implies this should not be initialized early). - bool can_init_static_fields = - manager_->GetCompiler()->GetCompilerOptions().IsBootImage() && - manager_->GetCompiler()->IsImageClass(descriptor) && - !StringPiece(descriptor).ends_with("$NoPreloadHolder;"); - if (can_init_static_fields) { - VLOG(compiler) << "Initializing: " << descriptor; - // TODO multithreading support. We should ensure the current compilation thread has - // exclusive access to the runtime and the transaction. To achieve this, we could use - // a ReaderWriterMutex but we're holding the mutator lock so we fail mutex sanity - // checks in Thread::AssertThreadSuspensionIsAllowable. - Runtime* const runtime = Runtime::Current(); - Transaction transaction; - - // Run the class initializer in transaction mode. - runtime->EnterTransactionMode(&transaction); - const mirror::Class::Status old_status = klass->GetStatus(); - bool success = manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, true, - true); - // TODO we detach transaction from runtime to indicate we quit the transactional - // mode which prevents the GC from visiting objects modified during the transaction. - // Ensure GC is not run so don't access freed objects when aborting transaction. - - { - ScopedAssertNoThreadSuspension ants("Transaction end"); - runtime->ExitTransactionMode(); - - if (!success) { - CHECK(soa.Self()->IsExceptionPending()); - mirror::Throwable* exception = soa.Self()->GetException(); - VLOG(compiler) << "Initialization of " << descriptor << " aborted because of " - << exception->Dump(); - std::ostream* file_log = manager_->GetCompiler()-> - GetCompilerOptions().GetInitFailureOutput(); - if (file_log != nullptr) { - *file_log << descriptor << "\n"; - *file_log << exception->Dump() << "\n"; - } - soa.Self()->ClearException(); - transaction.Rollback(); - CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored"; + can_init_static_fields = !StringPiece(descriptor).ends_with("$NoPreloadHolder;"); + } else { + can_init_static_fields = manager_->GetCompiler()->GetCompilerOptions().IsAppImage() && + !soa.Self()->IsExceptionPending() && + NoClinitInDependency(klass, soa.Self(), &class_loader); + // TODO The checking for clinit can be removed since it's already + // checked when init superclass. Currently keep it because it contains + // processing of intern strings. Will be removed later when intern strings + // and clinit are both initialized. + } + + if (can_init_static_fields) { + VLOG(compiler) << "Initializing: " << descriptor; + // TODO multithreading support. We should ensure the current compilation thread has + // exclusive access to the runtime and the transaction. To achieve this, we could use + // a ReaderWriterMutex but we're holding the mutator lock so we fail mutex sanity + // checks in Thread::AssertThreadSuspensionIsAllowable. + Runtime* const runtime = Runtime::Current(); + Transaction transaction; + + // Run the class initializer in transaction mode. + runtime->EnterTransactionMode(&transaction); + bool success = manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, true, + true); + // TODO we detach transaction from runtime to indicate we quit the transactional + // mode which prevents the GC from visiting objects modified during the transaction. + // Ensure GC is not run so don't access freed objects when aborting transaction. + + { + ScopedAssertNoThreadSuspension ants("Transaction end"); + runtime->ExitTransactionMode(); + + if (!success) { + CHECK(soa.Self()->IsExceptionPending()); + mirror::Throwable* exception = soa.Self()->GetException(); + VLOG(compiler) << "Initialization of " << descriptor << " aborted because of " + << exception->Dump(); + std::ostream* file_log = manager_->GetCompiler()-> + GetCompilerOptions().GetInitFailureOutput(); + if (file_log != nullptr) { + *file_log << descriptor << "\n"; + *file_log << exception->Dump() << "\n"; } + soa.Self()->ClearException(); + transaction.Rollback(); + CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored"; } + } - if (!success) { - // On failure, still intern strings of static fields and seen in <clinit>, as these - // will be created in the zygote. This is separated from the transaction code just - // above as we will allocate strings, so must be allowed to suspend. + if (!success) { + // On failure, still intern strings of static fields and seen in <clinit>, as these + // will be created in the zygote. This is separated from the transaction code just + // above as we will allocate strings, so must be allowed to suspend. + if (&klass->GetDexFile() == manager_->GetDexFile()) { InternStrings(klass, class_loader); } } } - soa.Self()->AssertNoPendingException(); } + soa.Self()->AssertNoPendingException(); } - // Record the final class status if necessary. - ClassReference ref(manager_->GetDexFile(), class_def_index); - manager_->GetCompiler()->RecordClassStatus(ref, klass->GetStatus()); } - // Clear any class not found or verification exceptions. - soa.Self()->ClearException(); + // Record the final class status if necessary. + ClassReference ref(&dex_file, klass->GetDexClassDefIndex()); + // Back up the status before doing initialization for static encoded fields, + // because the static encoded branch wants to keep the status to uninitialized. + manager_->GetCompiler()->RecordClassStatus(ref, old_status); } private: @@ -2397,6 +2435,136 @@ class InitializeClassVisitor : public CompilationVisitor { } } + bool ResolveTypesOfMethods(Thread* self, ArtMethod* m) + REQUIRES_SHARED(Locks::mutator_lock_) { + auto rtn_type = m->GetReturnType(true); + if (rtn_type == nullptr) { + self->ClearException(); + return false; + } + const DexFile::TypeList* types = m->GetParameterTypeList(); + if (types != nullptr) { + for (uint32_t i = 0; i < types->Size(); ++i) { + dex::TypeIndex param_type_idx = types->GetTypeItem(i).type_idx_; + auto param_type = m->GetClassFromTypeIndex(param_type_idx, true); + if (param_type == nullptr) { + self->ClearException(); + return false; + } + } + } + return true; + } + + // Pre resolve types mentioned in all method signatures before start a transaction + // since ResolveType doesn't work in transaction mode. + bool PreResolveTypes(Thread* self, const Handle<mirror::Class>& klass) + REQUIRES_SHARED(Locks::mutator_lock_) { + PointerSize pointer_size = manager_->GetClassLinker()->GetImagePointerSize(); + for (ArtMethod& m : klass->GetMethods(pointer_size)) { + if (!ResolveTypesOfMethods(self, &m)) { + return false; + } + } + if (klass->IsInterface()) { + return true; + } else if (klass->HasSuperClass()) { + StackHandleScope<1> hs(self); + MutableHandle<mirror::Class> super_klass(hs.NewHandle<mirror::Class>(klass->GetSuperClass())); + for (int i = super_klass->GetVTableLength() - 1; i >= 0; --i) { + ArtMethod* m = klass->GetVTableEntry(i, pointer_size); + ArtMethod* super_m = super_klass->GetVTableEntry(i, pointer_size); + if (!ResolveTypesOfMethods(self, m) || !ResolveTypesOfMethods(self, super_m)) { + return false; + } + } + for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) { + super_klass.Assign(klass->GetIfTable()->GetInterface(i)); + if (klass->GetClassLoader() != super_klass->GetClassLoader()) { + uint32_t num_methods = super_klass->NumVirtualMethods(); + for (uint32_t j = 0; j < num_methods; ++j) { + ArtMethod* m = klass->GetIfTable()->GetMethodArray(i)->GetElementPtrSize<ArtMethod*>( + j, pointer_size); + ArtMethod* super_m = super_klass->GetVirtualMethod(j, pointer_size); + if (!ResolveTypesOfMethods(self, m) || !ResolveTypesOfMethods(self, super_m)) { + return false; + } + } + } + } + } + return true; + } + + // Initialize the klass's dependencies recursively before initializing itself. + // Checking for interfaces is also necessary since interfaces can contain + // both default methods and static encoded fields. + bool InitializeDependencies(const Handle<mirror::Class>& klass, + Handle<mirror::ClassLoader> class_loader, + Thread* self) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (klass->HasSuperClass()) { + ObjPtr<mirror::Class> super_class = klass->GetSuperClass(); + StackHandleScope<1> hs(self); + Handle<mirror::Class> handle_scope_super(hs.NewHandle(super_class)); + if (!handle_scope_super->IsInitialized()) { + this->TryInitializeClass(handle_scope_super, class_loader); + if (!handle_scope_super->IsInitialized()) { + return false; + } + } + } + + uint32_t num_if = klass->NumDirectInterfaces(); + for (size_t i = 0; i < num_if; i++) { + ObjPtr<mirror::Class> + interface = mirror::Class::GetDirectInterface(self, klass.Get(), i); + StackHandleScope<1> hs(self); + Handle<mirror::Class> handle_interface(hs.NewHandle(interface)); + + TryInitializeClass(handle_interface, class_loader); + + if (!handle_interface->IsInitialized()) { + return false; + } + } + + return PreResolveTypes(self, klass); + } + + // In this phase the classes containing class initializers are ignored. Make sure no + // clinit appears in kalss's super class chain and interfaces. + bool NoClinitInDependency(const Handle<mirror::Class>& klass, + Thread* self, + Handle<mirror::ClassLoader>* class_loader) + REQUIRES_SHARED(Locks::mutator_lock_) { + ArtMethod* clinit = + klass->FindClassInitializer(manager_->GetClassLinker()->GetImagePointerSize()); + if (clinit != nullptr) { + VLOG(compiler) << klass->PrettyClass() << ' ' << clinit->PrettyMethod(true); + return false; + } + if (klass->HasSuperClass()) { + ObjPtr<mirror::Class> super_class = klass->GetSuperClass(); + StackHandleScope<1> hs(self); + Handle<mirror::Class> handle_scope_super(hs.NewHandle(super_class)); + if (!NoClinitInDependency(handle_scope_super, self, class_loader)) + return false; + } + + uint32_t num_if = klass->NumDirectInterfaces(); + for (size_t i = 0; i < num_if; i++) { + ObjPtr<mirror::Class> + interface = mirror::Class::GetDirectInterface(self, klass.Get(), i); + StackHandleScope<1> hs(self); + Handle<mirror::Class> handle_interface(hs.NewHandle(interface)); + if (!NoClinitInDependency(handle_interface, self, class_loader)) + return false; + } + + return true; + } + const ParallelCompilationManager* const manager_; }; @@ -2416,7 +2584,10 @@ void CompilerDriver::InitializeClasses(jobject jni_class_loader, ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, dex_files, init_thread_pool); - if (GetCompilerOptions().IsBootImage()) { + + if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsAppImage()) { + // Set the concurrency thread to 1 to support initialization for App Images since transaction + // doesn't support multithreading now. // TODO: remove this when transactional mode supports multithreading. init_thread_count = 1U; } @@ -2690,14 +2861,15 @@ void CompilerDriver::AddCompiledMethod(const MethodReference& method_ref, << method_ref.dex_file->PrettyMethod(method_ref.dex_method_index); } -CompiledClass* CompilerDriver::GetCompiledClass(ClassReference ref) const { +bool CompilerDriver::GetCompiledClass(ClassReference ref, mirror::Class::Status* status) const { + DCHECK(status != nullptr); MutexLock mu(Thread::Current(), compiled_classes_lock_); - ClassTable::const_iterator it = compiled_classes_.find(ref); + ClassStateTable::const_iterator it = compiled_classes_.find(ref); if (it == compiled_classes_.end()) { - return nullptr; + return false; } - CHECK(it->second != nullptr); - return it->second; + *status = it->second; + return true; } void CompilerDriver::RecordClassStatus(ClassReference ref, mirror::Class::Status status) { @@ -2719,12 +2891,11 @@ void CompilerDriver::RecordClassStatus(ClassReference ref, mirror::Class::Status MutexLock mu(Thread::Current(), compiled_classes_lock_); auto it = compiled_classes_.find(ref); if (it == compiled_classes_.end()) { - CompiledClass* compiled_class = new CompiledClass(status); - compiled_classes_.Overwrite(ref, compiled_class); - } else if (status > it->second->GetStatus()) { + compiled_classes_.Overwrite(ref, status); + } else if (status > it->second) { // Update the status if we now have a greater one. This happens with vdex, // which records a class is verified, but does not resolve it. - it->second->SetStatus(status); + it->second = status; } } diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 874e35716c..38e7d2c686 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -23,7 +23,6 @@ #include <vector> #include "arch/instruction_set.h" -#include "base/arena_allocator.h" #include "base/array_ref.h" #include "base/bit_utils.h" #include "base/mutex.h" @@ -38,7 +37,6 @@ #include "method_reference.h" #include "mirror/class.h" // For mirror::Class::Status. #include "os.h" -#include "runtime.h" #include "safe_map.h" #include "thread_pool.h" #include "utils/atomic_method_ref_map.h" @@ -56,12 +54,12 @@ class VerifierDepsTest; } // namespace verifier class BitVector; -class CompiledClass; class CompiledMethod; class CompilerOptions; class DexCompilationUnit; struct InlineIGetIPutData; class InstructionSetFeatures; +class InternTable; class ParallelCompilationManager; class ScopedObjectAccess; template <class Allocator> class SrcMap; @@ -132,10 +130,7 @@ class CompilerDriver { REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!compiled_classes_lock_, !dex_to_dex_references_lock_); - VerificationResults* GetVerificationResults() const { - DCHECK(Runtime::Current()->IsAotCompiler()); - return verification_results_; - } + VerificationResults* GetVerificationResults() const; InstructionSet GetInstructionSet() const { return instruction_set_; @@ -164,7 +159,7 @@ class CompilerDriver { std::unique_ptr<const std::vector<uint8_t>> CreateQuickResolutionTrampoline() const; std::unique_ptr<const std::vector<uint8_t>> CreateQuickToInterpreterBridge() const; - CompiledClass* GetCompiledClass(ClassReference ref) const + bool GetCompiledClass(ClassReference ref, mirror::Class::Status* status) const REQUIRES(!compiled_classes_lock_); CompiledMethod* GetCompiledMethod(MethodReference ref) const; @@ -179,6 +174,40 @@ class CompilerDriver { uint16_t class_def_index, bool requires) REQUIRES(!requires_constructor_barrier_lock_); + + // Do the <init> methods for this class require a constructor barrier (prior to the return)? + // The answer is "yes", if and only if this class has any instance final fields. + // (This must not be called for any non-<init> methods; the answer would be "no"). + // + // --- + // + // JLS 17.5.1 "Semantics of final fields" mandates that all final fields are frozen at the end + // of the invoked constructor. The constructor barrier is a conservative implementation means of + // enforcing the freezes happen-before the object being constructed is observable by another + // thread. + // + // Note: This question only makes sense for instance constructors; + // static constructors (despite possibly having finals) never need + // a barrier. + // + // JLS 12.4.2 "Detailed Initialization Procedure" approximately describes + // class initialization as: + // + // lock(class.lock) + // class.state = initializing + // unlock(class.lock) + // + // invoke <clinit> + // + // lock(class.lock) + // class.state = initialized + // unlock(class.lock) <-- acts as a release + // + // The last operation in the above example acts as an atomic release + // for any stores in <clinit>, which ends up being stricter + // than what a constructor barrier needs. + // + // See also QuasiAtomic::ThreadFenceForConstructor(). bool RequiresConstructorBarrier(Thread* self, const DexFile* dex_file, uint16_t class_def_index) @@ -471,10 +500,10 @@ class CompilerDriver { std::map<ClassReference, bool> requires_constructor_barrier_ GUARDED_BY(requires_constructor_barrier_lock_); - typedef SafeMap<const ClassReference, CompiledClass*> ClassTable; + using ClassStateTable = SafeMap<const ClassReference, mirror::Class::Status>; // All class references that this compiler has compiled. mutable Mutex compiled_classes_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; - ClassTable compiled_classes_ GUARDED_BY(compiled_classes_lock_); + ClassStateTable compiled_classes_ GUARDED_BY(compiled_classes_lock_); typedef AtomicMethodRefMap<CompiledMethod*> MethodTable; diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc index 17854fd61a..4b979d8125 100644 --- a/compiler/driver/compiler_driver_test.cc +++ b/compiler/driver/compiler_driver_test.cc @@ -23,7 +23,6 @@ #include "art_method-inl.h" #include "class_linker-inl.h" #include "common_compiler_test.h" -#include "compiled_class.h" #include "dex_file.h" #include "dex_file_types.h" #include "gc/heap.h" @@ -240,8 +239,14 @@ class CompilerDriverProfileTest : public CompilerDriverTest { ProfileCompilationInfo info; for (const std::unique_ptr<const DexFile>& dex_file : dex_files) { - profile_info_.AddMethodIndex(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 1); - profile_info_.AddMethodIndex(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 2); + profile_info_.AddMethodIndex(dex_file->GetLocation(), + dex_file->GetLocationChecksum(), + 1, + dex_file->NumMethodIds()); + profile_info_.AddMethodIndex(dex_file->GetLocation(), + dex_file->GetLocationChecksum(), + 2, + dex_file->NumMethodIds()); } return &profile_info_; } @@ -339,10 +344,11 @@ class CompilerDriverVerifyTest : public CompilerDriverTest { ASSERT_NE(klass, nullptr); EXPECT_TRUE(klass->IsVerified()); - CompiledClass* compiled_class = compiler_driver_->GetCompiledClass( - ClassReference(&klass->GetDexFile(), klass->GetDexTypeIndex().index_)); - ASSERT_NE(compiled_class, nullptr); - EXPECT_EQ(compiled_class->GetStatus(), mirror::Class::kStatusVerified); + mirror::Class::Status status; + bool found = compiler_driver_->GetCompiledClass( + ClassReference(&klass->GetDexFile(), klass->GetDexTypeIndex().index_), &status); + ASSERT_TRUE(found); + EXPECT_EQ(status, mirror::Class::kStatusVerified); } }; diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc index a0c0a2acf6..a4e2083fe4 100644 --- a/compiler/driver/compiler_options.cc +++ b/compiler/driver/compiler_options.cc @@ -200,7 +200,7 @@ bool CompilerOptions::ParseCompilerOption(const StringPiece& option, UsageFn Usa ParseDumpInitFailures(option, Usage); } else if (option.starts_with("--dump-cfg=")) { dump_cfg_file_name_ = option.substr(strlen("--dump-cfg=")).data(); - } else if (option.starts_with("--dump-cfg-append")) { + } else if (option == "--dump-cfg-append") { dump_cfg_append_ = true; } else if (option.starts_with("--register-allocation-strategy=")) { ParseRegisterAllocationStrategy(option, Usage); diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index 957ea99c49..89c2537476 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -31,7 +31,7 @@ namespace art { namespace verifier { class VerifierDepsTest; -} +} // namespace verifier class DexFile; @@ -237,6 +237,10 @@ class CompilerOptions FINAL { bool ParseCompilerOption(const StringPiece& option, UsageFn Usage); + void SetNonPic() { + compile_pic_ = false; + } + const std::string& GetDumpCfgFileName() const { return dump_cfg_file_name_; } diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc index 28c35e96b4..738f5a2b29 100644 --- a/compiler/elf_writer_quick.cc +++ b/compiler/elf_writer_quick.cc @@ -34,7 +34,7 @@ #include "leb128.h" #include "linker/buffered_output_stream.h" #include "linker/file_output_stream.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "thread_pool.h" #include "utils.h" diff --git a/compiler/image_test.cc b/compiler/image_test.cc index 7e53d8d2ab..9d7aff769b 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -14,492 +14,17 @@ * limitations under the License. */ -#include "image.h" - -#include <memory> -#include <string> +#include <string.h> #include <vector> -#include "android-base/stringprintf.h" +#include "image_test.h" -#include "art_method-inl.h" -#include "base/unix_file/fd_file.h" -#include "class_linker-inl.h" -#include "compiler_callbacks.h" -#include "common_compiler_test.h" -#include "debug/method_debug_info.h" -#include "dex/quick_compiler_callbacks.h" -#include "driver/compiler_options.h" -#include "elf_writer.h" -#include "elf_writer_quick.h" -#include "gc/space/image_space.h" -#include "image_writer.h" -#include "linker/buffered_output_stream.h" -#include "linker/file_output_stream.h" -#include "linker/multi_oat_relative_patcher.h" -#include "lock_word.h" -#include "mirror/object-inl.h" -#include "oat_writer.h" +#include "image.h" #include "scoped_thread_state_change-inl.h" -#include "signal_catcher.h" -#include "utils.h" +#include "thread.h" namespace art { -static const uintptr_t kRequestedImageBase = ART_BASE_ADDRESS; - -struct CompilationHelper { - std::vector<std::string> dex_file_locations; - std::vector<ScratchFile> image_locations; - std::vector<std::unique_ptr<const DexFile>> extra_dex_files; - std::vector<ScratchFile> image_files; - std::vector<ScratchFile> oat_files; - std::vector<ScratchFile> vdex_files; - std::string image_dir; - - void Compile(CompilerDriver* driver, - ImageHeader::StorageMode storage_mode); - - std::vector<size_t> GetImageObjectSectionSizes(); - - ~CompilationHelper(); -}; - -class ImageTest : public CommonCompilerTest { - protected: - virtual void SetUp() { - ReserveImageSpace(); - CommonCompilerTest::SetUp(); - } - - void TestWriteRead(ImageHeader::StorageMode storage_mode); - - void Compile(ImageHeader::StorageMode storage_mode, - CompilationHelper& out_helper, - const std::string& extra_dex = "", - const std::initializer_list<std::string>& image_classes = {}); - - void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE { - CommonCompilerTest::SetUpRuntimeOptions(options); - callbacks_.reset(new QuickCompilerCallbacks( - verification_results_.get(), - CompilerCallbacks::CallbackMode::kCompileBootImage)); - options->push_back(std::make_pair("compilercallbacks", callbacks_.get())); - } - - std::unordered_set<std::string>* GetImageClasses() OVERRIDE { - return new std::unordered_set<std::string>(image_classes_); - } - - ArtMethod* FindCopiedMethod(ArtMethod* origin, mirror::Class* klass) - REQUIRES_SHARED(Locks::mutator_lock_) { - PointerSize pointer_size = class_linker_->GetImagePointerSize(); - for (ArtMethod& m : klass->GetCopiedMethods(pointer_size)) { - if (strcmp(origin->GetName(), m.GetName()) == 0 && - origin->GetSignature() == m.GetSignature()) { - return &m; - } - } - return nullptr; - } - - private: - std::unordered_set<std::string> image_classes_; -}; - -CompilationHelper::~CompilationHelper() { - for (ScratchFile& image_file : image_files) { - image_file.Unlink(); - } - for (ScratchFile& oat_file : oat_files) { - oat_file.Unlink(); - } - for (ScratchFile& vdex_file : vdex_files) { - vdex_file.Unlink(); - } - const int rmdir_result = rmdir(image_dir.c_str()); - CHECK_EQ(0, rmdir_result); -} - -std::vector<size_t> CompilationHelper::GetImageObjectSectionSizes() { - std::vector<size_t> ret; - for (ScratchFile& image_file : image_files) { - std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str())); - CHECK(file.get() != nullptr); - ImageHeader image_header; - CHECK_EQ(file->ReadFully(&image_header, sizeof(image_header)), true); - CHECK(image_header.IsValid()); - ret.push_back(image_header.GetImageSize()); - } - return ret; -} - -void CompilationHelper::Compile(CompilerDriver* driver, - ImageHeader::StorageMode storage_mode) { - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - std::vector<const DexFile*> class_path = class_linker->GetBootClassPath(); - - for (const std::unique_ptr<const DexFile>& dex_file : extra_dex_files) { - { - ScopedObjectAccess soa(Thread::Current()); - // Inject in boot class path so that the compiler driver can see it. - class_linker->AppendToBootClassPath(soa.Self(), *dex_file.get()); - } - class_path.push_back(dex_file.get()); - } - - // Enable write for dex2dex. - for (const DexFile* dex_file : class_path) { - dex_file_locations.push_back(dex_file->GetLocation()); - if (dex_file->IsReadOnly()) { - dex_file->EnableWrite(); - } - } - { - // Create a generic tmp file, to be the base of the .art and .oat temporary files. - ScratchFile location; - for (int i = 0; i < static_cast<int>(class_path.size()); ++i) { - std::string cur_location = - android::base::StringPrintf("%s-%d.art", location.GetFilename().c_str(), i); - image_locations.push_back(ScratchFile(cur_location)); - } - } - std::vector<std::string> image_filenames; - for (ScratchFile& file : image_locations) { - std::string image_filename(GetSystemImageFilename(file.GetFilename().c_str(), kRuntimeISA)); - image_filenames.push_back(image_filename); - size_t pos = image_filename.rfind('/'); - CHECK_NE(pos, std::string::npos) << image_filename; - if (image_dir.empty()) { - image_dir = image_filename.substr(0, pos); - int mkdir_result = mkdir(image_dir.c_str(), 0700); - CHECK_EQ(0, mkdir_result) << image_dir; - } - image_files.push_back(ScratchFile(OS::CreateEmptyFile(image_filename.c_str()))); - } - - std::vector<std::string> oat_filenames; - std::vector<std::string> vdex_filenames; - for (const std::string& image_filename : image_filenames) { - std::string oat_filename = ReplaceFileExtension(image_filename, "oat"); - oat_files.push_back(ScratchFile(OS::CreateEmptyFile(oat_filename.c_str()))); - oat_filenames.push_back(oat_filename); - std::string vdex_filename = ReplaceFileExtension(image_filename, "vdex"); - vdex_files.push_back(ScratchFile(OS::CreateEmptyFile(vdex_filename.c_str()))); - vdex_filenames.push_back(vdex_filename); - } - - std::unordered_map<const DexFile*, size_t> dex_file_to_oat_index_map; - std::vector<const char*> oat_filename_vector; - for (const std::string& file : oat_filenames) { - oat_filename_vector.push_back(file.c_str()); - } - std::vector<const char*> image_filename_vector; - for (const std::string& file : image_filenames) { - image_filename_vector.push_back(file.c_str()); - } - size_t image_idx = 0; - for (const DexFile* dex_file : class_path) { - dex_file_to_oat_index_map.emplace(dex_file, image_idx); - ++image_idx; - } - // TODO: compile_pic should be a test argument. - std::unique_ptr<ImageWriter> writer(new ImageWriter(*driver, - kRequestedImageBase, - /*compile_pic*/false, - /*compile_app_image*/false, - storage_mode, - oat_filename_vector, - dex_file_to_oat_index_map)); - { - { - jobject class_loader = nullptr; - TimingLogger timings("ImageTest::WriteRead", false, false); - TimingLogger::ScopedTiming t("CompileAll", &timings); - driver->SetDexFilesForOatFile(class_path); - driver->CompileAll(class_loader, class_path, /* verifier_deps */ nullptr, &timings); - - t.NewTiming("WriteElf"); - SafeMap<std::string, std::string> key_value_store; - std::vector<const char*> dex_filename_vector; - for (size_t i = 0; i < class_path.size(); ++i) { - dex_filename_vector.push_back(""); - } - key_value_store.Put(OatHeader::kBootClassPathKey, - gc::space::ImageSpace::GetMultiImageBootClassPath( - dex_filename_vector, - oat_filename_vector, - image_filename_vector)); - - std::vector<std::unique_ptr<ElfWriter>> elf_writers; - std::vector<std::unique_ptr<OatWriter>> oat_writers; - for (ScratchFile& oat_file : oat_files) { - elf_writers.emplace_back(CreateElfWriterQuick(driver->GetInstructionSet(), - driver->GetInstructionSetFeatures(), - &driver->GetCompilerOptions(), - oat_file.GetFile())); - elf_writers.back()->Start(); - oat_writers.emplace_back(new OatWriter(/*compiling_boot_image*/true, - &timings, - /*profile_compilation_info*/nullptr)); - } - - std::vector<OutputStream*> rodata; - std::vector<std::unique_ptr<MemMap>> opened_dex_files_map; - std::vector<std::unique_ptr<const DexFile>> opened_dex_files; - // Now that we have finalized key_value_store_, start writing the oat file. - for (size_t i = 0, size = oat_writers.size(); i != size; ++i) { - const DexFile* dex_file = class_path[i]; - rodata.push_back(elf_writers[i]->StartRoData()); - ArrayRef<const uint8_t> raw_dex_file( - reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()), - dex_file->GetHeader().file_size_); - oat_writers[i]->AddRawDexFileSource(raw_dex_file, - dex_file->GetLocation().c_str(), - dex_file->GetLocationChecksum()); - - std::unique_ptr<MemMap> cur_opened_dex_files_map; - std::vector<std::unique_ptr<const DexFile>> cur_opened_dex_files; - bool dex_files_ok = oat_writers[i]->WriteAndOpenDexFiles( - kIsVdexEnabled ? vdex_files[i].GetFile() : oat_files[i].GetFile(), - rodata.back(), - driver->GetInstructionSet(), - driver->GetInstructionSetFeatures(), - &key_value_store, - /* verify */ false, // Dex files may be dex-to-dex-ed, don't verify. - /* update_input_vdex */ false, - &cur_opened_dex_files_map, - &cur_opened_dex_files); - ASSERT_TRUE(dex_files_ok); - - if (cur_opened_dex_files_map != nullptr) { - opened_dex_files_map.push_back(std::move(cur_opened_dex_files_map)); - for (std::unique_ptr<const DexFile>& cur_dex_file : cur_opened_dex_files) { - // dex_file_oat_index_map_.emplace(dex_file.get(), i); - opened_dex_files.push_back(std::move(cur_dex_file)); - } - } else { - ASSERT_TRUE(cur_opened_dex_files.empty()); - } - } - bool image_space_ok = writer->PrepareImageAddressSpace(); - ASSERT_TRUE(image_space_ok); - - if (kIsVdexEnabled) { - for (size_t i = 0, size = vdex_files.size(); i != size; ++i) { - std::unique_ptr<BufferedOutputStream> vdex_out( - MakeUnique<BufferedOutputStream>( - MakeUnique<FileOutputStream>(vdex_files[i].GetFile()))); - oat_writers[i]->WriteVerifierDeps(vdex_out.get(), nullptr); - oat_writers[i]->WriteChecksumsAndVdexHeader(vdex_out.get()); - } - } - - for (size_t i = 0, size = oat_files.size(); i != size; ++i) { - linker::MultiOatRelativePatcher patcher(driver->GetInstructionSet(), - driver->GetInstructionSetFeatures()); - OatWriter* const oat_writer = oat_writers[i].get(); - ElfWriter* const elf_writer = elf_writers[i].get(); - std::vector<const DexFile*> cur_dex_files(1u, class_path[i]); - oat_writer->Initialize(driver, writer.get(), cur_dex_files); - oat_writer->PrepareLayout(&patcher); - size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset(); - size_t text_size = oat_writer->GetOatSize() - rodata_size; - elf_writer->PrepareDynamicSection(rodata_size, - text_size, - oat_writer->GetBssSize(), - oat_writer->GetBssRootsOffset()); - - writer->UpdateOatFileLayout(i, - elf_writer->GetLoadedSize(), - oat_writer->GetOatDataOffset(), - oat_writer->GetOatSize()); - - bool rodata_ok = oat_writer->WriteRodata(rodata[i]); - ASSERT_TRUE(rodata_ok); - elf_writer->EndRoData(rodata[i]); - - OutputStream* text = elf_writer->StartText(); - bool text_ok = oat_writer->WriteCode(text); - ASSERT_TRUE(text_ok); - elf_writer->EndText(text); - - bool header_ok = oat_writer->WriteHeader(elf_writer->GetStream(), 0u, 0u, 0u); - ASSERT_TRUE(header_ok); - - writer->UpdateOatFileHeader(i, oat_writer->GetOatHeader()); - - elf_writer->WriteDynamicSection(); - elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo()); - - bool success = elf_writer->End(); - ASSERT_TRUE(success); - } - } - - bool success_image = writer->Write(kInvalidFd, - image_filename_vector, - oat_filename_vector); - ASSERT_TRUE(success_image); - - for (size_t i = 0, size = oat_filenames.size(); i != size; ++i) { - const char* oat_filename = oat_filenames[i].c_str(); - std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename)); - ASSERT_TRUE(oat_file != nullptr); - bool success_fixup = ElfWriter::Fixup(oat_file.get(), - writer->GetOatDataBegin(i)); - ASSERT_TRUE(success_fixup); - ASSERT_EQ(oat_file->FlushCloseOrErase(), 0) << "Could not flush and close oat file " - << oat_filename; - } - } -} - -void ImageTest::Compile(ImageHeader::StorageMode storage_mode, - CompilationHelper& helper, - const std::string& extra_dex, - const std::initializer_list<std::string>& image_classes) { - for (const std::string& image_class : image_classes) { - image_classes_.insert(image_class); - } - CreateCompilerDriver(Compiler::kOptimizing, kRuntimeISA, kIsTargetBuild ? 2U : 16U); - // Set inline filter values. - compiler_options_->SetInlineMaxCodeUnits(CompilerOptions::kDefaultInlineMaxCodeUnits); - image_classes_.clear(); - if (!extra_dex.empty()) { - helper.extra_dex_files = OpenTestDexFiles(extra_dex.c_str()); - } - helper.Compile(compiler_driver_.get(), storage_mode); - if (image_classes.begin() != image_classes.end()) { - // Make sure the class got initialized. - ScopedObjectAccess soa(Thread::Current()); - ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); - for (const std::string& image_class : image_classes) { - mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str()); - EXPECT_TRUE(klass != nullptr); - EXPECT_TRUE(klass->IsInitialized()); - } - } -} - -void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { - CompilationHelper helper; - Compile(storage_mode, /*out*/ helper); - std::vector<uint64_t> image_file_sizes; - for (ScratchFile& image_file : helper.image_files) { - std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str())); - ASSERT_TRUE(file.get() != nullptr); - ImageHeader image_header; - ASSERT_EQ(file->ReadFully(&image_header, sizeof(image_header)), true); - ASSERT_TRUE(image_header.IsValid()); - const auto& bitmap_section = image_header.GetImageSection(ImageHeader::kSectionImageBitmap); - ASSERT_GE(bitmap_section.Offset(), sizeof(image_header)); - ASSERT_NE(0U, bitmap_section.Size()); - - gc::Heap* heap = Runtime::Current()->GetHeap(); - ASSERT_TRUE(heap->HaveContinuousSpaces()); - gc::space::ContinuousSpace* space = heap->GetNonMovingSpace(); - ASSERT_FALSE(space->IsImageSpace()); - ASSERT_TRUE(space != nullptr); - ASSERT_TRUE(space->IsMallocSpace()); - image_file_sizes.push_back(file->GetLength()); - } - - ASSERT_TRUE(compiler_driver_->GetImageClasses() != nullptr); - std::unordered_set<std::string> image_classes(*compiler_driver_->GetImageClasses()); - - // Need to delete the compiler since it has worker threads which are attached to runtime. - compiler_driver_.reset(); - - // Tear down old runtime before making a new one, clearing out misc state. - - // Remove the reservation of the memory for use to load the image. - // Need to do this before we reset the runtime. - UnreserveImageSpace(); - - helper.extra_dex_files.clear(); - runtime_.reset(); - java_lang_dex_file_ = nullptr; - - MemMap::Init(); - - RuntimeOptions options; - std::string image("-Ximage:"); - image.append(helper.image_locations[0].GetFilename()); - options.push_back(std::make_pair(image.c_str(), static_cast<void*>(nullptr))); - // By default the compiler this creates will not include patch information. - options.push_back(std::make_pair("-Xnorelocate", nullptr)); - - if (!Runtime::Create(options, false)) { - LOG(FATAL) << "Failed to create runtime"; - return; - } - runtime_.reset(Runtime::Current()); - // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start, - // give it away now and then switch to a more managable ScopedObjectAccess. - Thread::Current()->TransitionFromRunnableToSuspended(kNative); - ScopedObjectAccess soa(Thread::Current()); - ASSERT_TRUE(runtime_.get() != nullptr); - class_linker_ = runtime_->GetClassLinker(); - - gc::Heap* heap = Runtime::Current()->GetHeap(); - ASSERT_TRUE(heap->HasBootImageSpace()); - ASSERT_TRUE(heap->GetNonMovingSpace()->IsMallocSpace()); - - // We loaded the runtime with an explicit image, so it must exist. - ASSERT_EQ(heap->GetBootImageSpaces().size(), image_file_sizes.size()); - for (size_t i = 0; i < helper.dex_file_locations.size(); ++i) { - std::unique_ptr<const DexFile> dex( - LoadExpectSingleDexFile(helper.dex_file_locations[i].c_str())); - ASSERT_TRUE(dex != nullptr); - uint64_t image_file_size = image_file_sizes[i]; - gc::space::ImageSpace* image_space = heap->GetBootImageSpaces()[i]; - ASSERT_TRUE(image_space != nullptr); - if (storage_mode == ImageHeader::kStorageModeUncompressed) { - // Uncompressed, image should be smaller than file. - ASSERT_LE(image_space->GetImageHeader().GetImageSize(), image_file_size); - } else if (image_file_size > 16 * KB) { - // Compressed, file should be smaller than image. Not really valid for small images. - ASSERT_LE(image_file_size, image_space->GetImageHeader().GetImageSize()); - } - - image_space->VerifyImageAllocations(); - uint8_t* image_begin = image_space->Begin(); - uint8_t* image_end = image_space->End(); - if (i == 0) { - // This check is only valid for image 0. - CHECK_EQ(kRequestedImageBase, reinterpret_cast<uintptr_t>(image_begin)); - } - for (size_t j = 0; j < dex->NumClassDefs(); ++j) { - const DexFile::ClassDef& class_def = dex->GetClassDef(j); - const char* descriptor = dex->GetClassDescriptor(class_def); - mirror::Class* klass = class_linker_->FindSystemClass(soa.Self(), descriptor); - EXPECT_TRUE(klass != nullptr) << descriptor; - if (image_classes.find(descriptor) == image_classes.end()) { - EXPECT_TRUE(reinterpret_cast<uint8_t*>(klass) >= image_end || - reinterpret_cast<uint8_t*>(klass) < image_begin) << descriptor; - } else { - // Image classes should be located inside the image. - EXPECT_LT(image_begin, reinterpret_cast<uint8_t*>(klass)) << descriptor; - EXPECT_LT(reinterpret_cast<uint8_t*>(klass), image_end) << descriptor; - } - EXPECT_TRUE(Monitor::IsValidLockWord(klass->GetLockWord(false))); - } - } -} - -TEST_F(ImageTest, WriteReadUncompressed) { - TestWriteRead(ImageHeader::kStorageModeUncompressed); -} - -TEST_F(ImageTest, WriteReadLZ4) { - TestWriteRead(ImageHeader::kStorageModeLZ4); -} - -TEST_F(ImageTest, WriteReadLZ4HC) { - TestWriteRead(ImageHeader::kStorageModeLZ4HC); -} - TEST_F(ImageTest, TestImageLayout) { std::vector<size_t> image_sizes; std::vector<size_t> image_sizes_extra; diff --git a/compiler/image_test.h b/compiler/image_test.h new file mode 100644 index 0000000000..2f15ff4815 --- /dev/null +++ b/compiler/image_test.h @@ -0,0 +1,497 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_IMAGE_TEST_H_ +#define ART_COMPILER_IMAGE_TEST_H_ + +#include "image.h" + +#include <memory> +#include <string> +#include <vector> + +#include "android-base/stringprintf.h" + +#include "art_method-inl.h" +#include "base/unix_file/fd_file.h" +#include "class_linker-inl.h" +#include "compiler_callbacks.h" +#include "common_compiler_test.h" +#include "debug/method_debug_info.h" +#include "dex/quick_compiler_callbacks.h" +#include "driver/compiler_options.h" +#include "elf_writer.h" +#include "elf_writer_quick.h" +#include "gc/space/image_space.h" +#include "image_writer.h" +#include "linker/buffered_output_stream.h" +#include "linker/file_output_stream.h" +#include "linker/multi_oat_relative_patcher.h" +#include "lock_word.h" +#include "mirror/object-inl.h" +#include "oat_writer.h" +#include "scoped_thread_state_change-inl.h" +#include "signal_catcher.h" +#include "utils.h" + +namespace art { + +static const uintptr_t kRequestedImageBase = ART_BASE_ADDRESS; + +struct CompilationHelper { + std::vector<std::string> dex_file_locations; + std::vector<ScratchFile> image_locations; + std::vector<std::unique_ptr<const DexFile>> extra_dex_files; + std::vector<ScratchFile> image_files; + std::vector<ScratchFile> oat_files; + std::vector<ScratchFile> vdex_files; + std::string image_dir; + + void Compile(CompilerDriver* driver, + ImageHeader::StorageMode storage_mode); + + std::vector<size_t> GetImageObjectSectionSizes(); + + ~CompilationHelper(); +}; + +class ImageTest : public CommonCompilerTest { + protected: + virtual void SetUp() { + ReserveImageSpace(); + CommonCompilerTest::SetUp(); + } + + void TestWriteRead(ImageHeader::StorageMode storage_mode); + + void Compile(ImageHeader::StorageMode storage_mode, + CompilationHelper& out_helper, + const std::string& extra_dex = "", + const std::initializer_list<std::string>& image_classes = {}); + + void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE { + CommonCompilerTest::SetUpRuntimeOptions(options); + callbacks_.reset(new QuickCompilerCallbacks( + verification_results_.get(), + CompilerCallbacks::CallbackMode::kCompileBootImage)); + options->push_back(std::make_pair("compilercallbacks", callbacks_.get())); + } + + std::unordered_set<std::string>* GetImageClasses() OVERRIDE { + return new std::unordered_set<std::string>(image_classes_); + } + + ArtMethod* FindCopiedMethod(ArtMethod* origin, mirror::Class* klass) + REQUIRES_SHARED(Locks::mutator_lock_) { + PointerSize pointer_size = class_linker_->GetImagePointerSize(); + for (ArtMethod& m : klass->GetCopiedMethods(pointer_size)) { + if (strcmp(origin->GetName(), m.GetName()) == 0 && + origin->GetSignature() == m.GetSignature()) { + return &m; + } + } + return nullptr; + } + + private: + std::unordered_set<std::string> image_classes_; +}; + +inline CompilationHelper::~CompilationHelper() { + for (ScratchFile& image_file : image_files) { + image_file.Unlink(); + } + for (ScratchFile& oat_file : oat_files) { + oat_file.Unlink(); + } + for (ScratchFile& vdex_file : vdex_files) { + vdex_file.Unlink(); + } + const int rmdir_result = rmdir(image_dir.c_str()); + CHECK_EQ(0, rmdir_result); +} + +inline std::vector<size_t> CompilationHelper::GetImageObjectSectionSizes() { + std::vector<size_t> ret; + for (ScratchFile& image_file : image_files) { + std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str())); + CHECK(file.get() != nullptr); + ImageHeader image_header; + CHECK_EQ(file->ReadFully(&image_header, sizeof(image_header)), true); + CHECK(image_header.IsValid()); + ret.push_back(image_header.GetImageSize()); + } + return ret; +} + +inline void CompilationHelper::Compile(CompilerDriver* driver, + ImageHeader::StorageMode storage_mode) { + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + std::vector<const DexFile*> class_path = class_linker->GetBootClassPath(); + + for (const std::unique_ptr<const DexFile>& dex_file : extra_dex_files) { + { + ScopedObjectAccess soa(Thread::Current()); + // Inject in boot class path so that the compiler driver can see it. + class_linker->AppendToBootClassPath(soa.Self(), *dex_file.get()); + } + class_path.push_back(dex_file.get()); + } + + // Enable write for dex2dex. + for (const DexFile* dex_file : class_path) { + dex_file_locations.push_back(dex_file->GetLocation()); + if (dex_file->IsReadOnly()) { + dex_file->EnableWrite(); + } + } + { + // Create a generic tmp file, to be the base of the .art and .oat temporary files. + ScratchFile location; + for (int i = 0; i < static_cast<int>(class_path.size()); ++i) { + std::string cur_location = + android::base::StringPrintf("%s-%d.art", location.GetFilename().c_str(), i); + image_locations.push_back(ScratchFile(cur_location)); + } + } + std::vector<std::string> image_filenames; + for (ScratchFile& file : image_locations) { + std::string image_filename(GetSystemImageFilename(file.GetFilename().c_str(), kRuntimeISA)); + image_filenames.push_back(image_filename); + size_t pos = image_filename.rfind('/'); + CHECK_NE(pos, std::string::npos) << image_filename; + if (image_dir.empty()) { + image_dir = image_filename.substr(0, pos); + int mkdir_result = mkdir(image_dir.c_str(), 0700); + CHECK_EQ(0, mkdir_result) << image_dir; + } + image_files.push_back(ScratchFile(OS::CreateEmptyFile(image_filename.c_str()))); + } + + std::vector<std::string> oat_filenames; + std::vector<std::string> vdex_filenames; + for (const std::string& image_filename : image_filenames) { + std::string oat_filename = ReplaceFileExtension(image_filename, "oat"); + oat_files.push_back(ScratchFile(OS::CreateEmptyFile(oat_filename.c_str()))); + oat_filenames.push_back(oat_filename); + std::string vdex_filename = ReplaceFileExtension(image_filename, "vdex"); + vdex_files.push_back(ScratchFile(OS::CreateEmptyFile(vdex_filename.c_str()))); + vdex_filenames.push_back(vdex_filename); + } + + std::unordered_map<const DexFile*, size_t> dex_file_to_oat_index_map; + std::vector<const char*> oat_filename_vector; + for (const std::string& file : oat_filenames) { + oat_filename_vector.push_back(file.c_str()); + } + std::vector<const char*> image_filename_vector; + for (const std::string& file : image_filenames) { + image_filename_vector.push_back(file.c_str()); + } + size_t image_idx = 0; + for (const DexFile* dex_file : class_path) { + dex_file_to_oat_index_map.emplace(dex_file, image_idx); + ++image_idx; + } + // TODO: compile_pic should be a test argument. + std::unique_ptr<ImageWriter> writer(new ImageWriter(*driver, + kRequestedImageBase, + /*compile_pic*/false, + /*compile_app_image*/false, + storage_mode, + oat_filename_vector, + dex_file_to_oat_index_map)); + { + { + jobject class_loader = nullptr; + TimingLogger timings("ImageTest::WriteRead", false, false); + TimingLogger::ScopedTiming t("CompileAll", &timings); + driver->SetDexFilesForOatFile(class_path); + driver->CompileAll(class_loader, class_path, /* verifier_deps */ nullptr, &timings); + + t.NewTiming("WriteElf"); + SafeMap<std::string, std::string> key_value_store; + std::vector<const char*> dex_filename_vector; + for (size_t i = 0; i < class_path.size(); ++i) { + dex_filename_vector.push_back(""); + } + key_value_store.Put(OatHeader::kBootClassPathKey, + gc::space::ImageSpace::GetMultiImageBootClassPath( + dex_filename_vector, + oat_filename_vector, + image_filename_vector)); + + std::vector<std::unique_ptr<ElfWriter>> elf_writers; + std::vector<std::unique_ptr<OatWriter>> oat_writers; + for (ScratchFile& oat_file : oat_files) { + elf_writers.emplace_back(CreateElfWriterQuick(driver->GetInstructionSet(), + driver->GetInstructionSetFeatures(), + &driver->GetCompilerOptions(), + oat_file.GetFile())); + elf_writers.back()->Start(); + oat_writers.emplace_back(new OatWriter(/*compiling_boot_image*/true, + &timings, + /*profile_compilation_info*/nullptr)); + } + + std::vector<OutputStream*> rodata; + std::vector<std::unique_ptr<MemMap>> opened_dex_files_map; + std::vector<std::unique_ptr<const DexFile>> opened_dex_files; + // Now that we have finalized key_value_store_, start writing the oat file. + for (size_t i = 0, size = oat_writers.size(); i != size; ++i) { + const DexFile* dex_file = class_path[i]; + rodata.push_back(elf_writers[i]->StartRoData()); + ArrayRef<const uint8_t> raw_dex_file( + reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()), + dex_file->GetHeader().file_size_); + oat_writers[i]->AddRawDexFileSource(raw_dex_file, + dex_file->GetLocation().c_str(), + dex_file->GetLocationChecksum()); + + std::unique_ptr<MemMap> cur_opened_dex_files_map; + std::vector<std::unique_ptr<const DexFile>> cur_opened_dex_files; + bool dex_files_ok = oat_writers[i]->WriteAndOpenDexFiles( + kIsVdexEnabled ? vdex_files[i].GetFile() : oat_files[i].GetFile(), + rodata.back(), + driver->GetInstructionSet(), + driver->GetInstructionSetFeatures(), + &key_value_store, + /* verify */ false, // Dex files may be dex-to-dex-ed, don't verify. + /* update_input_vdex */ false, + &cur_opened_dex_files_map, + &cur_opened_dex_files); + ASSERT_TRUE(dex_files_ok); + + if (cur_opened_dex_files_map != nullptr) { + opened_dex_files_map.push_back(std::move(cur_opened_dex_files_map)); + for (std::unique_ptr<const DexFile>& cur_dex_file : cur_opened_dex_files) { + // dex_file_oat_index_map_.emplace(dex_file.get(), i); + opened_dex_files.push_back(std::move(cur_dex_file)); + } + } else { + ASSERT_TRUE(cur_opened_dex_files.empty()); + } + } + bool image_space_ok = writer->PrepareImageAddressSpace(); + ASSERT_TRUE(image_space_ok); + + if (kIsVdexEnabled) { + for (size_t i = 0, size = vdex_files.size(); i != size; ++i) { + std::unique_ptr<BufferedOutputStream> vdex_out( + MakeUnique<BufferedOutputStream>( + MakeUnique<FileOutputStream>(vdex_files[i].GetFile()))); + oat_writers[i]->WriteVerifierDeps(vdex_out.get(), nullptr); + oat_writers[i]->WriteChecksumsAndVdexHeader(vdex_out.get()); + } + } + + for (size_t i = 0, size = oat_files.size(); i != size; ++i) { + linker::MultiOatRelativePatcher patcher(driver->GetInstructionSet(), + driver->GetInstructionSetFeatures()); + OatWriter* const oat_writer = oat_writers[i].get(); + ElfWriter* const elf_writer = elf_writers[i].get(); + std::vector<const DexFile*> cur_dex_files(1u, class_path[i]); + oat_writer->Initialize(driver, writer.get(), cur_dex_files); + oat_writer->PrepareLayout(&patcher); + size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset(); + size_t text_size = oat_writer->GetOatSize() - rodata_size; + elf_writer->PrepareDynamicSection(rodata_size, + text_size, + oat_writer->GetBssSize(), + oat_writer->GetBssRootsOffset()); + + writer->UpdateOatFileLayout(i, + elf_writer->GetLoadedSize(), + oat_writer->GetOatDataOffset(), + oat_writer->GetOatSize()); + + bool rodata_ok = oat_writer->WriteRodata(rodata[i]); + ASSERT_TRUE(rodata_ok); + elf_writer->EndRoData(rodata[i]); + + OutputStream* text = elf_writer->StartText(); + bool text_ok = oat_writer->WriteCode(text); + ASSERT_TRUE(text_ok); + elf_writer->EndText(text); + + bool header_ok = oat_writer->WriteHeader(elf_writer->GetStream(), 0u, 0u, 0u); + ASSERT_TRUE(header_ok); + + writer->UpdateOatFileHeader(i, oat_writer->GetOatHeader()); + + elf_writer->WriteDynamicSection(); + elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo()); + + bool success = elf_writer->End(); + ASSERT_TRUE(success); + } + } + + bool success_image = writer->Write(kInvalidFd, + image_filename_vector, + oat_filename_vector); + ASSERT_TRUE(success_image); + + for (size_t i = 0, size = oat_filenames.size(); i != size; ++i) { + const char* oat_filename = oat_filenames[i].c_str(); + std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename)); + ASSERT_TRUE(oat_file != nullptr); + bool success_fixup = ElfWriter::Fixup(oat_file.get(), + writer->GetOatDataBegin(i)); + ASSERT_TRUE(success_fixup); + ASSERT_EQ(oat_file->FlushCloseOrErase(), 0) << "Could not flush and close oat file " + << oat_filename; + } + } +} + +inline void ImageTest::Compile(ImageHeader::StorageMode storage_mode, + CompilationHelper& helper, + const std::string& extra_dex, + const std::initializer_list<std::string>& image_classes) { + for (const std::string& image_class : image_classes) { + image_classes_.insert(image_class); + } + CreateCompilerDriver(Compiler::kOptimizing, kRuntimeISA, kIsTargetBuild ? 2U : 16U); + // Set inline filter values. + compiler_options_->SetInlineMaxCodeUnits(CompilerOptions::kDefaultInlineMaxCodeUnits); + image_classes_.clear(); + if (!extra_dex.empty()) { + helper.extra_dex_files = OpenTestDexFiles(extra_dex.c_str()); + } + helper.Compile(compiler_driver_.get(), storage_mode); + if (image_classes.begin() != image_classes.end()) { + // Make sure the class got initialized. + ScopedObjectAccess soa(Thread::Current()); + ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); + for (const std::string& image_class : image_classes) { + mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str()); + EXPECT_TRUE(klass != nullptr); + EXPECT_TRUE(klass->IsInitialized()); + } + } +} + +inline void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { + CompilationHelper helper; + Compile(storage_mode, /*out*/ helper); + std::vector<uint64_t> image_file_sizes; + for (ScratchFile& image_file : helper.image_files) { + std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str())); + ASSERT_TRUE(file.get() != nullptr); + ImageHeader image_header; + ASSERT_EQ(file->ReadFully(&image_header, sizeof(image_header)), true); + ASSERT_TRUE(image_header.IsValid()); + const auto& bitmap_section = image_header.GetImageSection(ImageHeader::kSectionImageBitmap); + ASSERT_GE(bitmap_section.Offset(), sizeof(image_header)); + ASSERT_NE(0U, bitmap_section.Size()); + + gc::Heap* heap = Runtime::Current()->GetHeap(); + ASSERT_TRUE(heap->HaveContinuousSpaces()); + gc::space::ContinuousSpace* space = heap->GetNonMovingSpace(); + ASSERT_FALSE(space->IsImageSpace()); + ASSERT_TRUE(space != nullptr); + ASSERT_TRUE(space->IsMallocSpace()); + image_file_sizes.push_back(file->GetLength()); + } + + ASSERT_TRUE(compiler_driver_->GetImageClasses() != nullptr); + std::unordered_set<std::string> image_classes(*compiler_driver_->GetImageClasses()); + + // Need to delete the compiler since it has worker threads which are attached to runtime. + compiler_driver_.reset(); + + // Tear down old runtime before making a new one, clearing out misc state. + + // Remove the reservation of the memory for use to load the image. + // Need to do this before we reset the runtime. + UnreserveImageSpace(); + + helper.extra_dex_files.clear(); + runtime_.reset(); + java_lang_dex_file_ = nullptr; + + MemMap::Init(); + + RuntimeOptions options; + std::string image("-Ximage:"); + image.append(helper.image_locations[0].GetFilename()); + options.push_back(std::make_pair(image.c_str(), static_cast<void*>(nullptr))); + // By default the compiler this creates will not include patch information. + options.push_back(std::make_pair("-Xnorelocate", nullptr)); + + if (!Runtime::Create(options, false)) { + LOG(FATAL) << "Failed to create runtime"; + return; + } + runtime_.reset(Runtime::Current()); + // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start, + // give it away now and then switch to a more managable ScopedObjectAccess. + Thread::Current()->TransitionFromRunnableToSuspended(kNative); + ScopedObjectAccess soa(Thread::Current()); + ASSERT_TRUE(runtime_.get() != nullptr); + class_linker_ = runtime_->GetClassLinker(); + + gc::Heap* heap = Runtime::Current()->GetHeap(); + ASSERT_TRUE(heap->HasBootImageSpace()); + ASSERT_TRUE(heap->GetNonMovingSpace()->IsMallocSpace()); + + // We loaded the runtime with an explicit image, so it must exist. + ASSERT_EQ(heap->GetBootImageSpaces().size(), image_file_sizes.size()); + for (size_t i = 0; i < helper.dex_file_locations.size(); ++i) { + std::unique_ptr<const DexFile> dex( + LoadExpectSingleDexFile(helper.dex_file_locations[i].c_str())); + ASSERT_TRUE(dex != nullptr); + uint64_t image_file_size = image_file_sizes[i]; + gc::space::ImageSpace* image_space = heap->GetBootImageSpaces()[i]; + ASSERT_TRUE(image_space != nullptr); + if (storage_mode == ImageHeader::kStorageModeUncompressed) { + // Uncompressed, image should be smaller than file. + ASSERT_LE(image_space->GetImageHeader().GetImageSize(), image_file_size); + } else if (image_file_size > 16 * KB) { + // Compressed, file should be smaller than image. Not really valid for small images. + ASSERT_LE(image_file_size, image_space->GetImageHeader().GetImageSize()); + } + + image_space->VerifyImageAllocations(); + uint8_t* image_begin = image_space->Begin(); + uint8_t* image_end = image_space->End(); + if (i == 0) { + // This check is only valid for image 0. + CHECK_EQ(kRequestedImageBase, reinterpret_cast<uintptr_t>(image_begin)); + } + for (size_t j = 0; j < dex->NumClassDefs(); ++j) { + const DexFile::ClassDef& class_def = dex->GetClassDef(j); + const char* descriptor = dex->GetClassDescriptor(class_def); + mirror::Class* klass = class_linker_->FindSystemClass(soa.Self(), descriptor); + EXPECT_TRUE(klass != nullptr) << descriptor; + if (image_classes.find(descriptor) == image_classes.end()) { + EXPECT_TRUE(reinterpret_cast<uint8_t*>(klass) >= image_end || + reinterpret_cast<uint8_t*>(klass) < image_begin) << descriptor; + } else { + // Image classes should be located inside the image. + EXPECT_LT(image_begin, reinterpret_cast<uint8_t*>(klass)) << descriptor; + EXPECT_LT(reinterpret_cast<uint8_t*>(klass), image_end) << descriptor; + } + EXPECT_TRUE(Monitor::IsValidLockWord(klass->GetLockWord(false))); + } + } +} + + +} // namespace art + +#endif // ART_COMPILER_IMAGE_TEST_H_ diff --git a/compiler/compiled_class.h b/compiler/image_write_read_test.cc index 06ce946942..32c0b06766 100644 --- a/compiler/compiled_class.h +++ b/compiler/image_write_read_test.cc @@ -14,27 +14,20 @@ * limitations under the License. */ -#ifndef ART_COMPILER_COMPILED_CLASS_H_ -#define ART_COMPILER_COMPILED_CLASS_H_ - -#include "mirror/class.h" +#include "image_test.h" namespace art { -class CompiledClass { - public: - explicit CompiledClass(mirror::Class::Status status) : status_(status) {} - ~CompiledClass() {} - mirror::Class::Status GetStatus() const { - return status_; - } - void SetStatus(mirror::Class::Status status) { - status_ = status; - } - private: - mirror::Class::Status status_; -}; +TEST_F(ImageTest, WriteReadUncompressed) { + TestWriteRead(ImageHeader::kStorageModeUncompressed); +} -} // namespace art +TEST_F(ImageTest, WriteReadLZ4) { + TestWriteRead(ImageHeader::kStorageModeLZ4); +} -#endif // ART_COMPILER_COMPILED_CLASS_H_ +TEST_F(ImageTest, WriteReadLZ4HC) { + TestWriteRead(ImageHeader::kStorageModeLZ4HC); +} + +} // namespace art diff --git a/compiler/image_writer.h b/compiler/image_writer.h index 39113c8143..2283b39773 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -33,8 +33,8 @@ #include "base/enums.h" #include "base/length_prefixed_array.h" #include "base/macros.h" +#include "class_table.h" #include "driver/compiler_driver.h" -#include "gc/space/space.h" #include "image.h" #include "lock_word.h" #include "mem_map.h" @@ -47,6 +47,10 @@ namespace art { namespace gc { +namespace accounting { +template <size_t kAlignment> class SpaceBitmap; +typedef SpaceBitmap<kObjectAlignment> ContinuousSpaceBitmap; +} // namespace accounting namespace space { class ImageSpace; } // namespace space @@ -57,7 +61,6 @@ class ClassLoader; } // namespace mirror class ClassLoaderVisitor; -class ClassTable; class ImtConflictTable; static constexpr int kInvalidFd = -1; diff --git a/compiler/intrinsics_list.h b/compiler/intrinsics_list.h index 63c23cb074..c8a0119667 100644 --- a/compiler/intrinsics_list.h +++ b/compiler/intrinsics_list.h @@ -28,6 +28,9 @@ // The kNoThrow should be renamed to kNoVisibleThrow, as it is ok to GVN Integer.valueOf // (kNoSideEffects), and it is also OK to remove it if it's unused. +// Note: Thread.interrupted is marked with kAllSideEffects due to the lack of finer grain +// side effects representation. + #define INTRINSICS_LIST(V) \ V(DoubleDoubleToRawLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Double;", "doubleToRawLongBits", "(D)J") \ V(DoubleDoubleToLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Double;", "doubleToLongBits", "(D)J") \ @@ -154,7 +157,8 @@ V(UnsafeStoreFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "storeFence", "()V") \ V(UnsafeFullFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "fullFence", "()V") \ V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/ref/Reference;", "getReferent", "()Ljava/lang/Object;") \ - V(IntegerValueOf, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Integer;", "valueOf", "(I)Ljava/lang/Integer;") + V(IntegerValueOf, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Integer;", "valueOf", "(I)Ljava/lang/Integer;") \ + V(ThreadInterrupted, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kNoThrow, "Ljava/lang/Thread;", "interrupted", "()Z") #endif // ART_COMPILER_INTRINSICS_LIST_H_ #undef ART_COMPILER_INTRINSICS_LIST_H_ // #define is only for lint. diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index a146274ddb..66135414f7 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -105,7 +105,7 @@ JitCompiler::JitCompiler() { /* implicit_null_checks */ true, /* implicit_so_checks */ true, /* implicit_suspend_checks */ false, - /* pic */ true, // TODO: Support non-PIC in optimizing. + /* pic */ false, /* verbose_methods */ nullptr, /* init_failure_output */ nullptr, /* abort_on_hard_verifier_failure */ false, @@ -117,6 +117,9 @@ JitCompiler::JitCompiler() { for (const std::string& argument : Runtime::Current()->GetCompilerOptions()) { compiler_options_->ParseCompilerOption(argument, Usage); } + // JIT is never PIC, no matter what the runtime compiler options specify. + compiler_options_->SetNonPic(); + const InstructionSet instruction_set = kRuntimeISA; for (const StringPiece option : Runtime::Current()->GetCompilerOptions()) { VLOG(compiler) << "JIT compiler option " << option; diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc index f55d5a6fb8..c1ac230d43 100644 --- a/compiler/linker/arm/relative_patcher_arm_base.cc +++ b/compiler/linker/arm/relative_patcher_arm_base.cc @@ -249,7 +249,7 @@ uint32_t ArmBaseRelativePatcher::ReserveSpaceInternal(uint32_t offset, // All remaining method call patches will be handled by this thunk. DCHECK(!unprocessed_method_call_patches_.empty()); DCHECK_LE(thunk_offset - unprocessed_method_call_patches_.front().GetPatchOffset(), - MaxPositiveDisplacement(ThunkType::kMethodCall)); + MaxPositiveDisplacement(GetMethodCallKey())); unprocessed_method_call_patches_.clear(); } } @@ -271,8 +271,8 @@ uint32_t ArmBaseRelativePatcher::CalculateMethodCallDisplacement(uint32_t patch_ DCHECK(method_call_thunk_ != nullptr); // Unsigned arithmetic with its well-defined overflow behavior is just fine here. uint32_t displacement = target_offset - patch_offset; - uint32_t max_positive_displacement = MaxPositiveDisplacement(ThunkType::kMethodCall); - uint32_t max_negative_displacement = MaxNegativeDisplacement(ThunkType::kMethodCall); + uint32_t max_positive_displacement = MaxPositiveDisplacement(GetMethodCallKey()); + uint32_t max_negative_displacement = MaxNegativeDisplacement(GetMethodCallKey()); // NOTE: With unsigned arithmetic we do mean to use && rather than || below. if (displacement > max_positive_displacement && displacement < -max_negative_displacement) { // Unwritten thunks have higher offsets, check if it's within range. @@ -299,29 +299,40 @@ uint32_t ArmBaseRelativePatcher::GetThunkTargetOffset(const ThunkKey& key, uint3 if (data.HasWrittenOffset()) { uint32_t offset = data.LastWrittenOffset(); DCHECK_LT(offset, patch_offset); - if (patch_offset - offset <= MaxNegativeDisplacement(key.GetType())) { + if (patch_offset - offset <= MaxNegativeDisplacement(key)) { return offset; } } DCHECK(data.HasPendingOffset()); uint32_t offset = data.GetPendingOffset(); DCHECK_GT(offset, patch_offset); - DCHECK_LE(offset - patch_offset, MaxPositiveDisplacement(key.GetType())); + DCHECK_LE(offset - patch_offset, MaxPositiveDisplacement(key)); return offset; } +ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetMethodCallKey() { + return ThunkKey(ThunkType::kMethodCall); +} + +ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetBakerThunkKey( + const LinkerPatch& patch) { + DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kBakerReadBarrierBranch); + return ThunkKey(ThunkType::kBakerReadBarrier, + patch.GetBakerCustomValue1(), + patch.GetBakerCustomValue2()); +} + void ArmBaseRelativePatcher::ProcessPatches(const CompiledMethod* compiled_method, uint32_t code_offset) { for (const LinkerPatch& patch : compiled_method->GetPatches()) { uint32_t patch_offset = code_offset + patch.LiteralOffset(); - ThunkType key_type = static_cast<ThunkType>(-1); + ThunkKey key(static_cast<ThunkType>(-1)); ThunkData* old_data = nullptr; if (patch.GetType() == LinkerPatch::Type::kCallRelative) { - key_type = ThunkType::kMethodCall; + key = GetMethodCallKey(); unprocessed_method_call_patches_.emplace_back(patch_offset, patch.TargetMethod()); if (method_call_thunk_ == nullptr) { - ThunkKey key(key_type, ThunkParams{{ 0u, 0u }}); // NOLINT(whitespace/braces) - uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key_type); + uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key); auto it = thunks_.Put(key, ThunkData(CompileThunk(key), max_next_offset)); method_call_thunk_ = &it->second; AddUnreservedThunk(method_call_thunk_); @@ -329,11 +340,10 @@ void ArmBaseRelativePatcher::ProcessPatches(const CompiledMethod* compiled_metho old_data = method_call_thunk_; } } else if (patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch) { - ThunkKey key = GetBakerReadBarrierKey(patch); - key_type = key.GetType(); + key = GetBakerThunkKey(patch); auto lb = thunks_.lower_bound(key); if (lb == thunks_.end() || thunks_.key_comp()(key, lb->first)) { - uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key_type); + uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key); auto it = thunks_.PutBefore(lb, key, ThunkData(CompileThunk(key), max_next_offset)); AddUnreservedThunk(&it->second); } else { @@ -342,16 +352,16 @@ void ArmBaseRelativePatcher::ProcessPatches(const CompiledMethod* compiled_metho } if (old_data != nullptr) { // Shared path where an old thunk may need an update. - DCHECK(key_type != static_cast<ThunkType>(-1)); + DCHECK(key.GetType() != static_cast<ThunkType>(-1)); DCHECK(!old_data->HasReservedOffset() || old_data->LastReservedOffset() < patch_offset); if (old_data->NeedsNextThunk()) { // Patches for a method are ordered by literal offset, so if we still need to place // this thunk for a previous patch, that thunk shall be in range for this patch. - DCHECK_LE(old_data->MaxNextOffset(), CalculateMaxNextOffset(patch_offset, key_type)); + DCHECK_LE(old_data->MaxNextOffset(), CalculateMaxNextOffset(patch_offset, key)); } else { if (!old_data->HasReservedOffset() || - patch_offset - old_data->LastReservedOffset() > MaxNegativeDisplacement(key_type)) { - old_data->SetMaxNextOffset(CalculateMaxNextOffset(patch_offset, key_type)); + patch_offset - old_data->LastReservedOffset() > MaxNegativeDisplacement(key)) { + old_data->SetMaxNextOffset(CalculateMaxNextOffset(patch_offset, key)); AddUnreservedThunk(old_data); } } @@ -385,8 +395,8 @@ void ArmBaseRelativePatcher::ResolveMethodCalls(uint32_t quick_code_offset, DCHECK(!unreserved_thunks_.empty()); DCHECK(!unprocessed_method_call_patches_.empty()); DCHECK(method_call_thunk_ != nullptr); - uint32_t max_positive_displacement = MaxPositiveDisplacement(ThunkType::kMethodCall); - uint32_t max_negative_displacement = MaxNegativeDisplacement(ThunkType::kMethodCall); + uint32_t max_positive_displacement = MaxPositiveDisplacement(GetMethodCallKey()); + uint32_t max_negative_displacement = MaxNegativeDisplacement(GetMethodCallKey()); // Process as many patches as possible, stop only on unresolved targets or calls too far back. while (!unprocessed_method_call_patches_.empty()) { MethodReference target_method = unprocessed_method_call_patches_.front().GetTargetMethod(); @@ -439,8 +449,8 @@ void ArmBaseRelativePatcher::ResolveMethodCalls(uint32_t quick_code_offset, } inline uint32_t ArmBaseRelativePatcher::CalculateMaxNextOffset(uint32_t patch_offset, - ThunkType type) { - return RoundDown(patch_offset + MaxPositiveDisplacement(type), + const ThunkKey& key) { + return RoundDown(patch_offset + MaxPositiveDisplacement(key), GetInstructionSetAlignment(instruction_set_)); } diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h index 2cb1b6c535..5197ce2549 100644 --- a/compiler/linker/arm/relative_patcher_arm_base.h +++ b/compiler/linker/arm/relative_patcher_arm_base.h @@ -42,59 +42,30 @@ class ArmBaseRelativePatcher : public RelativePatcher { enum class ThunkType { kMethodCall, // Method call thunk. - kBakerReadBarrierField, // Baker read barrier, load field or array element at known offset. - kBakerReadBarrierRoot, // Baker read barrier, GC root load. - }; - - struct BakerReadBarrierOffsetParams { - uint32_t holder_reg; // Holder object for reading lock word. - uint32_t base_reg; // Base register, different from holder for large offset. - // If base differs from holder, it should be a pre-defined - // register to limit the number of thunks we need to emit. - // The offset is retrieved using introspection. - }; - - struct BakerReadBarrierRootParams { - uint32_t root_reg; // The register holding the GC root. - uint32_t dummy; - }; - - struct RawThunkParams { - uint32_t first; - uint32_t second; - }; - - union ThunkParams { - RawThunkParams raw_params; - BakerReadBarrierOffsetParams offset_params; - BakerReadBarrierRootParams root_params; + kBakerReadBarrier, // Baker read barrier. }; class ThunkKey { public: - ThunkKey(ThunkType type, ThunkParams params) : type_(type), params_(params) { } + explicit ThunkKey(ThunkType type, uint32_t custom_value1 = 0u, uint32_t custom_value2 = 0u) + : type_(type), custom_value1_(custom_value1), custom_value2_(custom_value2) { } ThunkType GetType() const { return type_; } - BakerReadBarrierOffsetParams GetOffsetParams() const { - DCHECK(type_ == ThunkType::kBakerReadBarrierField); - return params_.offset_params; + uint32_t GetCustomValue1() const { + return custom_value1_; } - BakerReadBarrierRootParams GetRootParams() const { - DCHECK(type_ == ThunkType::kBakerReadBarrierRoot); - return params_.root_params; - } - - RawThunkParams GetRawParams() const { - return params_.raw_params; + uint32_t GetCustomValue2() const { + return custom_value2_; } private: ThunkType type_; - ThunkParams params_; + uint32_t custom_value1_; + uint32_t custom_value2_; }; class ThunkKeyCompare { @@ -103,13 +74,16 @@ class ArmBaseRelativePatcher : public RelativePatcher { if (lhs.GetType() != rhs.GetType()) { return lhs.GetType() < rhs.GetType(); } - if (lhs.GetRawParams().first != rhs.GetRawParams().first) { - return lhs.GetRawParams().first < rhs.GetRawParams().first; + if (lhs.GetCustomValue1() != rhs.GetCustomValue1()) { + return lhs.GetCustomValue1() < rhs.GetCustomValue1(); } - return lhs.GetRawParams().second < rhs.GetRawParams().second; + return lhs.GetCustomValue2() < rhs.GetCustomValue2(); } }; + static ThunkKey GetMethodCallKey(); + static ThunkKey GetBakerThunkKey(const LinkerPatch& patch); + uint32_t ReserveSpaceInternal(uint32_t offset, const CompiledMethod* compiled_method, MethodReference method_ref, @@ -119,10 +93,9 @@ class ArmBaseRelativePatcher : public RelativePatcher { uint32_t CalculateMethodCallDisplacement(uint32_t patch_offset, uint32_t target_offset); - virtual ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) = 0; virtual std::vector<uint8_t> CompileThunk(const ThunkKey& key) = 0; - virtual uint32_t MaxPositiveDisplacement(ThunkType type) = 0; - virtual uint32_t MaxNegativeDisplacement(ThunkType type) = 0; + virtual uint32_t MaxPositiveDisplacement(const ThunkKey& key) = 0; + virtual uint32_t MaxNegativeDisplacement(const ThunkKey& key) = 0; private: class ThunkData; @@ -132,7 +105,7 @@ class ArmBaseRelativePatcher : public RelativePatcher { void ResolveMethodCalls(uint32_t quick_code_offset, MethodReference method_ref); - uint32_t CalculateMaxNextOffset(uint32_t patch_offset, ThunkType type); + uint32_t CalculateMaxNextOffset(uint32_t patch_offset, const ThunkKey& key); RelativePatcherTargetProvider* const provider_; const InstructionSet instruction_set_; diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc index 1a5d79ce70..aa5a9457b2 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.cc +++ b/compiler/linker/arm/relative_patcher_thumb2.cc @@ -16,9 +16,16 @@ #include "linker/arm/relative_patcher_thumb2.h" +#include "arch/arm/asm_support_arm.h" #include "art_method.h" +#include "base/bit_utils.h" #include "compiled_method.h" -#include "utils/arm/assembler_thumb2.h" +#include "entrypoints/quick/quick_entrypoints_enum.h" +#include "lock_word.h" +#include "mirror/object.h" +#include "mirror/array-inl.h" +#include "read_barrier.h" +#include "utils/arm/assembler_arm_vixl.h" namespace art { namespace linker { @@ -32,6 +39,12 @@ static constexpr int32_t kPcDisplacement = 4; constexpr uint32_t kMaxMethodCallPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement; constexpr uint32_t kMaxMethodCallNegativeDisplacement = (1u << 24) - kPcDisplacement; +// Maximum positive and negative displacement for a conditional branch measured from the patch +// location. (Signed 21 bit displacement with the last bit 0 has range [-2^20, 2^20-2] measured +// from the Thumb2 PC pointing right after the B.cond, i.e. 4 bytes later than the patch location.) +constexpr uint32_t kMaxBcondPositiveDisplacement = (1u << 20) - 2u + kPcDisplacement; +constexpr uint32_t kMaxBcondNegativeDisplacement = (1u << 20) - kPcDisplacement; + Thumb2RelativePatcher::Thumb2RelativePatcher(RelativePatcherTargetProvider* provider) : ArmBaseRelativePatcher(provider, kThumb2) { } @@ -84,29 +97,259 @@ void Thumb2RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, SetInsn32(code, literal_offset, insn); } -void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - const LinkerPatch& patch ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED) { - LOG(FATAL) << "UNIMPLEMENTED"; +void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset) { + DCHECK_ALIGNED(patch_offset, 2u); + uint32_t literal_offset = patch.LiteralOffset(); + DCHECK_ALIGNED(literal_offset, 2u); + DCHECK_LT(literal_offset, code->size()); + uint32_t insn = GetInsn32(code, literal_offset); + DCHECK_EQ(insn, 0xf0408000); // BNE +0 (unpatched) + ThunkKey key = GetBakerThunkKey(patch); + if (kIsDebugBuild) { + const uint32_t encoded_data = key.GetCustomValue1(); + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); + // Check that the next instruction matches the expected LDR. + switch (kind) { + case BakerReadBarrierKind::kField: { + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + if (width == BakerReadBarrierWidth::kWide) { + DCHECK_GE(code->size() - literal_offset, 8u); + uint32_t next_insn = GetInsn32(code, literal_offset + 4u); + // LDR (immediate), encoding T3, with correct base_reg. + CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16)); + } else { + DCHECK_GE(code->size() - literal_offset, 6u); + uint32_t next_insn = GetInsn16(code, literal_offset + 4u); + // LDR (immediate), encoding T1, with correct base_reg. + CheckValidReg(next_insn & 0x7u); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3)); + } + break; + } + case BakerReadBarrierKind::kArray: { + DCHECK_GE(code->size() - literal_offset, 8u); + uint32_t next_insn = GetInsn32(code, literal_offset + 4u); + // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]). + CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16)); + CheckValidReg(next_insn & 0xf); // Check index register + break; + } + case BakerReadBarrierKind::kGcRoot: { + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + if (width == BakerReadBarrierWidth::kWide) { + DCHECK_GE(literal_offset, 4u); + uint32_t prev_insn = GetInsn32(code, literal_offset - 4u); + // LDR (immediate), encoding T3, with correct root_reg. + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12)); + } else { + DCHECK_GE(literal_offset, 2u); + uint32_t prev_insn = GetInsn16(code, literal_offset - 2u); + // LDR (immediate), encoding T1, with correct root_reg. + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg); + } + break; + } + default: + LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(key.GetType()); + UNREACHABLE(); + } + } + uint32_t target_offset = GetThunkTargetOffset(key, patch_offset); + DCHECK_ALIGNED(target_offset, 4u); + uint32_t disp = target_offset - (patch_offset + kPcDisplacement); + DCHECK((disp >> 20) == 0u || (disp >> 20) == 0xfffu); // 21-bit signed. + insn |= ((disp << (26 - 20)) & 0x04000000u) | // Shift bit 20 to 26, "S". + ((disp >> (19 - 11)) & 0x00000800u) | // Shift bit 19 to 13, "J1". + ((disp >> (18 - 13)) & 0x00002000u) | // Shift bit 18 to 11, "J2". + ((disp << (16 - 12)) & 0x003f0000u) | // Shift bits 12-17 to 16-25, "imm6". + ((disp >> (1 - 0)) & 0x000007ffu); // Shift bits 1-12 to 0-11, "imm11". + SetInsn32(code, literal_offset, insn); } -ArmBaseRelativePatcher::ThunkKey Thumb2RelativePatcher::GetBakerReadBarrierKey( - const LinkerPatch& patch ATTRIBUTE_UNUSED) { - LOG(FATAL) << "UNIMPLEMENTED"; - UNREACHABLE(); +#define __ assembler.GetVIXLAssembler()-> + +static void EmitGrayCheckAndFastPath(arm::ArmVIXLAssembler& assembler, + vixl::aarch32::Register base_reg, + vixl::aarch32::MemOperand& lock_word, + vixl::aarch32::Label* slow_path, + int32_t raw_ldr_offset) { + using namespace vixl::aarch32; // NOLINT(build/namespaces) + // Load the lock word containing the rb_state. + __ Ldr(ip, lock_word); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted)); + __ B(ne, slow_path, /* is_far_target */ false); + __ Add(lr, lr, raw_ldr_offset); + // Introduce a dependency on the lock_word including rb_state, + // to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + __ Add(base_reg, base_reg, Operand(ip, LSR, 32)); + __ Bx(lr); // And return back to the function. + // Note: The fake dependency is unnecessary for the slow path. +} + +void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler, + uint32_t encoded_data) { + using namespace vixl::aarch32; // NOLINT(build/namespaces) + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); + switch (kind) { + case BakerReadBarrierKind::kField: { + // Check if the holder is gray and, if not, add fake dependency to the base register + // and return to the LDR instruction to load the reference. Otherwise, use introspection + // to load the reference and call the entrypoint (in kBakerCcEntrypointRegister) + // that performs further checks on the reference and marks it if needed. + Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data)); + CheckValidReg(holder_reg.GetCode()); + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip); + // If base_reg differs from holder_reg, the offset was too large and we must have + // emitted an explicit null check before the load. Otherwise, we need to null-check + // the holder as we do not necessarily do that check before going to the thunk. + vixl::aarch32::Label throw_npe; + if (holder_reg.Is(base_reg)) { + __ CompareAndBranchIfZero(holder_reg, &throw_npe, /* is_far_target */ false); + } + vixl::aarch32::Label slow_path; + MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value()); + const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide) + ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET; + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset); + __ Bind(&slow_path); + const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + + raw_ldr_offset; + Register ep_reg(kBakerCcEntrypointRegister); + if (width == BakerReadBarrierWidth::kWide) { + MemOperand ldr_half_address(lr, ldr_offset + 2); + __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12". + __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12. + __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference. + } else { + MemOperand ldr_address(lr, ldr_offset); + __ Ldrh(ip, ldr_address); // Load the LDR immediate, encoding T1. + __ Add(ep_reg, // Adjust the entrypoint address to the entrypoint + ep_reg, // for narrow LDR. + Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET)); + __ Ubfx(ip, ip, 6, 5); // Extract the imm5, i.e. offset / 4. + __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2)); // Load the reference. + } + // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference. + __ Bx(ep_reg); // Jump to the entrypoint. + if (holder_reg.Is(base_reg)) { + // Add null check slow path. The stack map is at the address pointed to by LR. + __ Bind(&throw_npe); + int32_t offset = GetThreadOffset<kArmPointerSize>(kQuickThrowNullPointer).Int32Value(); + __ Ldr(ip, MemOperand(/* Thread* */ vixl::aarch32::r9, offset)); + __ Bx(ip); + } + break; + } + case BakerReadBarrierKind::kArray: { + Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); + DCHECK(BakerReadBarrierWidth::kWide == BakerReadBarrierWidthField::Decode(encoded_data)); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip); + vixl::aarch32::Label slow_path; + int32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); + MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset); + DCHECK_LT(lock_word.GetOffsetImmediate(), 0); + const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET; + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset); + __ Bind(&slow_path); + const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + + raw_ldr_offset; + MemOperand ldr_address(lr, ldr_offset + 2); + __ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm", + // i.e. Rm+32 because the scale in imm2 is 2. + Register ep_reg(kBakerCcEntrypointRegister); // Insert ip to the entrypoint address to create + __ Bfi(ep_reg, ip, 3, 6); // a switch case target based on the index register. + __ Mov(ip, base_reg); // Move the base register to ip0. + __ Bx(ep_reg); // Jump to the entrypoint's array switch case. + break; + } + case BakerReadBarrierKind::kGcRoot: { + // Check if the reference needs to be marked and if so (i.e. not null, not marked yet + // and it does not have a forwarding address), call the correct introspection entrypoint; + // otherwise return the reference (or the extracted forwarding address). + // There is no gray bit check for GC roots. + Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(root_reg.GetCode()); + DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip); + vixl::aarch32::Label return_label, not_marked, forwarding_address; + __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target */ false); + MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value()); + __ Ldr(ip, lock_word); + __ Tst(ip, LockWord::kMarkBitStateMaskShifted); + __ B(eq, ¬_marked); + __ Bind(&return_label); + __ Bx(lr); + __ Bind(¬_marked); + static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3, + "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in " + " the highest bits and the 'forwarding address' state to have all bits set"); + __ Cmp(ip, Operand(0xc0000000)); + __ B(hs, &forwarding_address); + // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister + // to art_quick_read_barrier_mark_introspection_gc_roots. + Register ep_reg(kBakerCcEntrypointRegister); + int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide) + ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET + : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET; + __ Add(ep_reg, ep_reg, Operand(entrypoint_offset)); + __ Mov(ip, root_reg); + __ Bx(ep_reg); + __ Bind(&forwarding_address); + __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift); + __ Bx(lr); + break; + } + default: + LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); + UNREACHABLE(); + } } std::vector<uint8_t> Thumb2RelativePatcher::CompileThunk(const ThunkKey& key) { - DCHECK(key.GetType() == ThunkType::kMethodCall); - // The thunk just uses the entry point in the ArtMethod. This works even for calls - // to the generic JNI and interpreter trampolines. ArenaPool pool; ArenaAllocator arena(&pool); - arm::Thumb2Assembler assembler(&arena); - assembler.LoadFromOffset( - arm::kLoadWord, arm::PC, arm::R0, - ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); - assembler.bkpt(0); + arm::ArmVIXLAssembler assembler(&arena); + + switch (key.GetType()) { + case ThunkType::kMethodCall: + // The thunk just uses the entry point in the ArtMethod. This works even for calls + // to the generic JNI and interpreter trampolines. + assembler.LoadFromOffset( + arm::kLoadWord, + vixl::aarch32::pc, + vixl::aarch32::r0, + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); + __ Bkpt(0); + break; + case ThunkType::kBakerReadBarrier: + CompileBakerReadBarrierThunk(assembler, key.GetCustomValue1()); + break; + } + assembler.FinalizeCode(); std::vector<uint8_t> thunk_code(assembler.CodeSize()); MemoryRegion code(thunk_code.data(), thunk_code.size()); @@ -114,19 +357,29 @@ std::vector<uint8_t> Thumb2RelativePatcher::CompileThunk(const ThunkKey& key) { return thunk_code; } -uint32_t Thumb2RelativePatcher::MaxPositiveDisplacement(ThunkType type) { - DCHECK(type == ThunkType::kMethodCall); - return kMaxMethodCallPositiveDisplacement; +#undef __ + +uint32_t Thumb2RelativePatcher::MaxPositiveDisplacement(const ThunkKey& key) { + switch (key.GetType()) { + case ThunkType::kMethodCall: + return kMaxMethodCallPositiveDisplacement; + case ThunkType::kBakerReadBarrier: + return kMaxBcondPositiveDisplacement; + } } -uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(ThunkType type) { - DCHECK(type == ThunkType::kMethodCall); - return kMaxMethodCallNegativeDisplacement; +uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) { + switch (key.GetType()) { + case ThunkType::kMethodCall: + return kMaxMethodCallNegativeDisplacement; + case ThunkType::kBakerReadBarrier: + return kMaxBcondNegativeDisplacement; + } } void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) { DCHECK_LE(offset + 4u, code->size()); - DCHECK_EQ(offset & 1u, 0u); + DCHECK_ALIGNED(offset, 2u); uint8_t* addr = &(*code)[offset]; addr[0] = (value >> 16) & 0xff; addr[1] = (value >> 24) & 0xff; @@ -136,7 +389,7 @@ void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offse uint32_t Thumb2RelativePatcher::GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset) { DCHECK_LE(offset + 4u, code.size()); - DCHECK_EQ(offset & 1u, 0u); + DCHECK_ALIGNED(offset, 2u); const uint8_t* addr = &code[offset]; return (static_cast<uint32_t>(addr[0]) << 16) + @@ -151,5 +404,18 @@ uint32_t Thumb2RelativePatcher::GetInsn32(Vector* code, uint32_t offset) { return GetInsn32(ArrayRef<const uint8_t>(*code), offset); } +uint32_t Thumb2RelativePatcher::GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset) { + DCHECK_LE(offset + 2u, code.size()); + DCHECK_ALIGNED(offset, 2u); + const uint8_t* addr = &code[offset]; + return (static_cast<uint32_t>(addr[0]) << 0) + (static_cast<uint32_t>(addr[1]) << 8); +} + +template <typename Vector> +uint32_t Thumb2RelativePatcher::GetInsn16(Vector* code, uint32_t offset) { + static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); + return GetInsn16(ArrayRef<const uint8_t>(*code), offset); +} + } // namespace linker } // namespace art diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h index ab37802d0f..183e5e6c96 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.h +++ b/compiler/linker/arm/relative_patcher_thumb2.h @@ -17,13 +17,57 @@ #ifndef ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_ #define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_ +#include "arch/arm/registers_arm.h" +#include "base/array_ref.h" +#include "base/bit_field.h" +#include "base/bit_utils.h" #include "linker/arm/relative_patcher_arm_base.h" namespace art { + +namespace arm { +class ArmVIXLAssembler; +} // namespace arm + namespace linker { class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { public: + static constexpr uint32_t kBakerCcEntrypointRegister = 4u; + + static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, + uint32_t holder_reg, + bool narrow) { + CheckValidReg(base_reg); + CheckValidReg(holder_reg); + DCHECK(!narrow || base_reg < 8u) << base_reg; + BakerReadBarrierWidth width = + narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide; + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(holder_reg) | + BakerReadBarrierWidthField::Encode(width); + } + + static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) { + CheckValidReg(base_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) | + BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide); + } + + static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg, bool narrow) { + CheckValidReg(root_reg); + DCHECK(!narrow || root_reg < 8u) << root_reg; + BakerReadBarrierWidth width = + narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide; + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) | + BakerReadBarrierFirstRegField::Encode(root_reg) | + BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) | + BakerReadBarrierWidthField::Encode(width); + } + explicit Thumb2RelativePatcher(RelativePatcherTargetProvider* provider); void PatchCall(std::vector<uint8_t>* code, @@ -39,18 +83,58 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { uint32_t patch_offset) OVERRIDE; protected: - ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) OVERRIDE; std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE; - uint32_t MaxPositiveDisplacement(ThunkType type) OVERRIDE; - uint32_t MaxNegativeDisplacement(ThunkType type) OVERRIDE; + uint32_t MaxPositiveDisplacement(const ThunkKey& key) OVERRIDE; + uint32_t MaxNegativeDisplacement(const ThunkKey& key) OVERRIDE; private: + static constexpr uint32_t kInvalidEncodedReg = /* pc is invalid */ 15u; + + enum class BakerReadBarrierKind : uint8_t { + kField, // Field get or array get with constant offset (i.e. constant index). + kArray, // Array get with index in register. + kGcRoot, // GC root load. + kLast = kGcRoot + }; + + enum class BakerReadBarrierWidth : uint8_t { + kWide, // 32-bit LDR (and 32-bit NEG if heap poisoning is enabled). + kNarrow, // 16-bit LDR (and 16-bit NEG if heap poisoning is enabled). + kLast = kNarrow + }; + + static constexpr size_t kBitsForBakerReadBarrierKind = + MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast)); + static constexpr size_t kBitsForRegister = 4u; + using BakerReadBarrierKindField = + BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>; + using BakerReadBarrierFirstRegField = + BitField<uint32_t, kBitsForBakerReadBarrierKind, kBitsForRegister>; + using BakerReadBarrierSecondRegField = + BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>; + static constexpr size_t kBitsForBakerReadBarrierWidth = + MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierWidth::kLast)); + using BakerReadBarrierWidthField = BitField<BakerReadBarrierWidth, + kBitsForBakerReadBarrierKind + 2 * kBitsForRegister, + kBitsForBakerReadBarrierWidth>; + + static void CheckValidReg(uint32_t reg) { + DCHECK(reg < 12u && reg != kBakerCcEntrypointRegister) << reg; + } + + void CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler, uint32_t encoded_data); + void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value); static uint32_t GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset); template <typename Vector> static uint32_t GetInsn32(Vector* code, uint32_t offset); + static uint32_t GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset); + + template <typename Vector> + static uint32_t GetInsn16(Vector* code, uint32_t offset); + friend class Thumb2RelativePatcherTest; DISALLOW_COPY_AND_ASSIGN(Thumb2RelativePatcher); diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc index f08270d934..52e27afcf5 100644 --- a/compiler/linker/arm/relative_patcher_thumb2_test.cc +++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc @@ -14,8 +14,12 @@ * limitations under the License. */ +#include "base/casts.h" #include "linker/relative_patcher_test.h" #include "linker/arm/relative_patcher_thumb2.h" +#include "lock_word.h" +#include "mirror/array-inl.h" +#include "mirror/object.h" #include "oat_quick_method_header.h" namespace art { @@ -34,13 +38,102 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode; static const uint32_t kPcInsnOffset; + // The PC in Thumb mode is 4 bytes after the instruction location. + static constexpr uint32_t kPcAdjustment = 4u; + // Branches within range [-256, 256) can be created from these by adding the low 8 bits. - static constexpr uint32_t kBlPlus0 = 0xf000f800; - static constexpr uint32_t kBlMinus256 = 0xf7ffff00; + static constexpr uint32_t kBlPlus0 = 0xf000f800u; + static constexpr uint32_t kBlMinus256 = 0xf7ffff00u; // Special BL values. - static constexpr uint32_t kBlPlusMax = 0xf3ffd7ff; - static constexpr uint32_t kBlMinusMax = 0xf400d000; + static constexpr uint32_t kBlPlusMax = 0xf3ffd7ffu; + static constexpr uint32_t kBlMinusMax = 0xf400d000u; + + // BNE +0, 32-bit, encoding T3. Bits 0-10, 11, 13, 16-21, 26 are placeholder for target offset. + static constexpr uint32_t kBneWPlus0 = 0xf0408000u; + + // LDR immediate, 16-bit, encoding T1. Bits 6-10 are imm5, 0-2 are Rt, 3-5 are Rn. + static constexpr uint32_t kLdrInsn = 0x6800u; + + // LDR immediate, 32-bit, encoding T3. Bits 0-11 are offset, 12-15 are Rt, 16-20 are Rn. + static constexpr uint32_t kLdrWInsn = 0xf8d00000u; + + // LDR immediate, negative offset, encoding T4. Bits 0-7 are the offset to subtract. + static constexpr uint32_t kLdrNegativeOffset = 0xf8500c00u; + + // LDR register, lsl #2. Bits 4-5 are the imm2, i.e. the lsl shift. + static constexpr uint32_t kLdrRegLsl2 = 0xf8500020u; + + // NOP instructions. + static constexpr uint32_t kNopInsn = 0xbf00u; + static constexpr uint32_t kNopWInsn = 0xf3af8000u; + + void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) { + CHECK_LE(pos, code->size()); + if (IsUint<16>(insn)) { + const uint8_t insn_code[] = { + static_cast<uint8_t>(insn), + static_cast<uint8_t>(insn >> 8), + }; + static_assert(sizeof(insn_code) == 2u, "Invalid sizeof(insn_code)."); + code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code)); + } else { + const uint8_t insn_code[] = { + static_cast<uint8_t>(insn >> 16), + static_cast<uint8_t>(insn >> 24), + static_cast<uint8_t>(insn), + static_cast<uint8_t>(insn >> 8), + }; + static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code)."); + code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code)); + } + } + + void PushBackInsn(std::vector<uint8_t>* code, uint32_t insn) { + InsertInsn(code, code->size(), insn); + } + + std::vector<uint8_t> GenNops(size_t num_nops) { + std::vector<uint8_t> result; + result.reserve(num_nops * 2u); + for (size_t i = 0; i != num_nops; ++i) { + PushBackInsn(&result, kNopInsn); + } + return result; + } + + std::vector<uint8_t> RawCode(std::initializer_list<uint32_t> insns) { + std::vector<uint8_t> raw_code; + size_t number_of_16_bit_insns = + std::count_if(insns.begin(), insns.end(), [](uint32_t x) { return IsUint<16>(x); }); + raw_code.reserve(insns.size() * 4u - number_of_16_bit_insns * 2u); + for (uint32_t insn : insns) { + PushBackInsn(&raw_code, insn); + } + return raw_code; + } + + uint32_t BneWWithOffset(uint32_t bne_offset, uint32_t target_offset) { + if (!IsAligned<2u>(bne_offset)) { + LOG(ERROR) << "Unaligned bne_offset: " << bne_offset; + return 0xffffffffu; // Fails code diff later. + } + if (!IsAligned<2u>(target_offset)) { + LOG(ERROR) << "Unaligned target_offset: " << target_offset; + return 0xffffffffu; // Fails code diff later. + } + uint32_t diff = target_offset - bne_offset - kPcAdjustment; + DCHECK_ALIGNED(diff, 2u); + if ((diff >> 20) != 0 && (diff >> 20) != 0xfffu) { + LOG(ERROR) << "Target out of range: " << diff; + return 0xffffffffu; // Fails code diff later. + } + return kBneWPlus0 | ((diff >> 1) & 0x7ffu) // imm11 + | (((diff >> 12) & 0x3fu) << 16) // imm6 + | (((diff >> 18) & 1) << 13) // J1 + | (((diff >> 19) & 1) << 11) // J2 + | (((diff >> 20) & 1) << 26); // S + } bool Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code, const ArrayRef<const LinkerPatch>& method1_patches, @@ -95,9 +188,7 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { } std::vector<uint8_t> CompileMethodCallThunk() { - ArmBaseRelativePatcher::ThunkKey key( - ArmBaseRelativePatcher::ThunkType::kMethodCall, - ArmBaseRelativePatcher::ThunkParams{{ 0, 0 }}); // NOLINT(whitespace/braces) + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetMethodCallKey(); return static_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); } @@ -125,19 +216,57 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { std::vector<uint8_t> result; result.reserve(num_nops * 2u + 4u); for (size_t i = 0; i != num_nops; ++i) { - result.push_back(0x00); - result.push_back(0xbf); + PushBackInsn(&result, kNopInsn); } - result.push_back(static_cast<uint8_t>(bl >> 16)); - result.push_back(static_cast<uint8_t>(bl >> 24)); - result.push_back(static_cast<uint8_t>(bl)); - result.push_back(static_cast<uint8_t>(bl >> 8)); + PushBackInsn(&result, bl); return result; } - void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset); + void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset); void TestStringReference(uint32_t string_offset); void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset); + + std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, + uint32_t holder_reg, + bool narrow) { + const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( + 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg, narrow)); + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); + return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); + } + + std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) { + LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( + 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)); + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); + return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); + } + + std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg, bool narrow) { + LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( + 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow)); + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); + return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); + } + + uint32_t GetOutputInsn32(uint32_t offset) { + CHECK_LE(offset, output_.size()); + CHECK_GE(output_.size() - offset, 4u); + return (static_cast<uint32_t>(output_[offset]) << 16) | + (static_cast<uint32_t>(output_[offset + 1]) << 24) | + (static_cast<uint32_t>(output_[offset + 2]) << 0) | + (static_cast<uint32_t>(output_[offset + 3]) << 8); + } + + uint16_t GetOutputInsn16(uint32_t offset) { + CHECK_LE(offset, output_.size()); + CHECK_GE(output_.size() - offset, 2u); + return (static_cast<uint32_t>(output_[offset]) << 0) | + (static_cast<uint32_t>(output_[offset + 1]) << 8); + } + + void TestBakerFieldWide(uint32_t offset, uint32_t ref_reg); + void TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg); }; const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = { @@ -161,21 +290,22 @@ const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kUnpatchedPcRelativeCod kUnpatchedPcRelativeRawCode); const uint32_t Thumb2RelativePatcherTest::kPcInsnOffset = 8u; -void Thumb2RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin, - uint32_t element_offset) { - dex_cache_arrays_begin_ = dex_cache_arrays_begin; - LinkerPatch patches[] = { - LinkerPatch::DexCacheArrayPatch(0u, nullptr, kPcInsnOffset, element_offset), - LinkerPatch::DexCacheArrayPatch(4u, nullptr, kPcInsnOffset, element_offset), +void Thumb2RelativePatcherTest::TestStringBssEntry(uint32_t bss_begin, + uint32_t string_entry_offset) { + constexpr uint32_t kStringIndex = 1u; + string_index_to_offset_map_.Put(kStringIndex, string_entry_offset); + bss_begin_ = bss_begin; + const LinkerPatch patches[] = { + LinkerPatch::StringBssEntryPatch(0u, nullptr, kPcInsnOffset, kStringIndex), + LinkerPatch::StringBssEntryPatch(4u, nullptr, kPcInsnOffset, kStringIndex), }; - CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), - dex_cache_arrays_begin_ + element_offset); + CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset); } void Thumb2RelativePatcherTest::TestStringReference(uint32_t string_offset) { constexpr uint32_t kStringIndex = 1u; string_index_to_offset_map_.Put(kStringIndex, string_offset); - LinkerPatch patches[] = { + const LinkerPatch patches[] = { LinkerPatch::RelativeStringPatch(0u, nullptr, kPcInsnOffset, kStringIndex), LinkerPatch::RelativeStringPatch(4u, nullptr, kPcInsnOffset, kStringIndex), }; @@ -214,7 +344,7 @@ void Thumb2RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const Linker } TEST_F(Thumb2RelativePatcherTest, CallSelf) { - LinkerPatch patches[] = { + const LinkerPatch patches[] = { LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), }; AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); @@ -227,11 +357,11 @@ TEST_F(Thumb2RelativePatcherTest, CallSelf) { } TEST_F(Thumb2RelativePatcherTest, CallOther) { - LinkerPatch method1_patches[] = { + const LinkerPatch method1_patches[] = { LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), }; AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches)); - LinkerPatch method2_patches[] = { + const LinkerPatch method2_patches[] = { LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), }; AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches)); @@ -254,7 +384,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOther) { } TEST_F(Thumb2RelativePatcherTest, CallTrampoline) { - LinkerPatch patches[] = { + const LinkerPatch patches[] = { LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), }; AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); @@ -274,7 +404,7 @@ TEST_F(Thumb2RelativePatcherTest, CallTrampolineTooFar) { constexpr uint32_t bl_offset_in_method3 = 3u * 2u; // After NOPs. ArrayRef<const uint8_t> method3_code(method3_raw_code); ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); - LinkerPatch method3_patches[] = { + const LinkerPatch method3_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, missing_method_index), }; @@ -303,7 +433,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarAfter) { constexpr uint32_t bl_offset_in_method1 = 3u * 2u; // After NOPs. ArrayRef<const uint8_t> method1_code(method1_raw_code); ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); - LinkerPatch method1_patches[] = { + const LinkerPatch method1_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u), }; @@ -325,7 +455,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarBefore) { constexpr uint32_t bl_offset_in_method3 = 2u * 2u; // After NOPs. ArrayRef<const uint8_t> method3_code(method3_raw_code); ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); - LinkerPatch method3_patches[] = { + const LinkerPatch method3_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u), }; @@ -347,7 +477,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarAfter) { constexpr uint32_t bl_offset_in_method1 = 2u * 2u; // After NOPs. ArrayRef<const uint8_t> method1_code(method1_raw_code); ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); - LinkerPatch method1_patches[] = { + const LinkerPatch method1_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u), }; @@ -382,7 +512,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarBefore) { constexpr uint32_t bl_offset_in_method3 = 3u * 2u; // After NOPs. ArrayRef<const uint8_t> method3_code(method3_raw_code); ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); - LinkerPatch method3_patches[] = { + const LinkerPatch method3_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u), }; @@ -405,23 +535,23 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarBefore) { EXPECT_TRUE(CheckThunk(thunk_offset)); } -TEST_F(Thumb2RelativePatcherTest, DexCacheReference1) { - TestDexCacheReference(0x00ff0000u, 0x00fcu); +TEST_F(Thumb2RelativePatcherTest, StringBssEntry1) { + TestStringBssEntry(0x00ff0000u, 0x00fcu); ASSERT_LT(GetMethodOffset(1u), 0xfcu); } -TEST_F(Thumb2RelativePatcherTest, DexCacheReference2) { - TestDexCacheReference(0x02ff0000u, 0x05fcu); +TEST_F(Thumb2RelativePatcherTest, StringBssEntry2) { + TestStringBssEntry(0x02ff0000u, 0x05fcu); ASSERT_LT(GetMethodOffset(1u), 0xfcu); } -TEST_F(Thumb2RelativePatcherTest, DexCacheReference3) { - TestDexCacheReference(0x08ff0000u, 0x08fcu); +TEST_F(Thumb2RelativePatcherTest, StringBssEntry3) { + TestStringBssEntry(0x08ff0000u, 0x08fcu); ASSERT_LT(GetMethodOffset(1u), 0xfcu); } -TEST_F(Thumb2RelativePatcherTest, DexCacheReference4) { - TestDexCacheReference(0xd0ff0000u, 0x60fcu); +TEST_F(Thumb2RelativePatcherTest, StringBssEntry4) { + TestStringBssEntry(0xd0ff0000u, 0x60fcu); ASSERT_LT(GetMethodOffset(1u), 0xfcu); } @@ -445,5 +575,710 @@ TEST_F(Thumb2RelativePatcherTest, StringReference4) { ASSERT_LT(GetMethodOffset(1u), 0xfcu); } +void Thumb2RelativePatcherTest::TestBakerFieldWide(uint32_t offset, uint32_t ref_reg) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. + 8, 9, 10, 11, // IP, SP, LR and PC are reserved. + }; + DCHECK_ALIGNED(offset, 4u); + DCHECK_LT(offset, 4 * KB); + constexpr size_t kMethodCodeSize = 8u; + constexpr size_t kLiteralOffset = 0u; + uint32_t method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + for (uint32_t holder_reg : valid_regs) { + uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12); + const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + base_reg, holder_reg, /* narrow */ false); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data), + }; + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); + method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + for (uint32_t holder_reg : valid_regs) { + ++method_idx; + uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); + uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12); + const std::vector<uint8_t> expected_code = RawCode({bne, ldr}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne; + ASSERT_TRUE( + CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = + CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ false); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + size_t gray_check_offset = thunk_offset; + if (holder_reg == base_reg) { + // Verify that the null-check uses the correct register, i.e. holder_reg. + if (holder_reg < 8) { + ASSERT_GE(output_.size() - gray_check_offset, 2u); + ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); + gray_check_offset +=2u; + } else { + ASSERT_GE(output_.size() - gray_check_offset, 6u); + ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u); + ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ + gray_check_offset += 6u; + } + } + // Verify that the lock word for gray bit check is loaded from the holder address. + ASSERT_GE(output_.size() - gray_check_offset, + 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u); + const uint32_t load_lock_word = + kLdrWInsn | + (holder_reg << 16) | + (/* IP */ 12 << 12) | + mirror::Object::MonitorOffset().Uint32Value(); + ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset)); + // Verify the gray bit check. + DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate. + uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift); + const uint32_t tst_gray_bit_without_offset = + 0xf0100f00 | (/* IP */ 12 << 16) + | (((ror_shift >> 4) & 1) << 26) // i + | (((ror_shift >> 1) & 7) << 12) // imm3 + | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift). + EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u)); + EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u); // BNE + // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset"). + const uint32_t fake_dependency = + 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00) + (/* IP */ 12) | // Rm = IP + (base_reg << 16) | // Rn = base_reg + (base_reg << 8); // Rd = base_reg + EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u)); + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); + } + } +} + +void Thumb2RelativePatcherTest::TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. + 8, 9, 10, 11, // IP, SP, LR and PC are reserved. + }; + DCHECK_ALIGNED(offset, 4u); + DCHECK_LT(offset, 32u); + constexpr size_t kMethodCodeSize = 6u; + constexpr size_t kLiteralOffset = 0u; + uint32_t method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + if (base_reg >= 8u) { + continue; + } + for (uint32_t holder_reg : valid_regs) { + uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg; + const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + base_reg, holder_reg, /* narrow */ true); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data), + }; + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); + method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + if (base_reg >= 8u) { + continue; + } + for (uint32_t holder_reg : valid_regs) { + ++method_idx; + uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); + uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg; + const std::vector<uint8_t> expected_code = RawCode({bne, ldr}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne; + ASSERT_TRUE( + CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = + CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ true); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + size_t gray_check_offset = thunk_offset; + if (holder_reg == base_reg) { + // Verify that the null-check uses the correct register, i.e. holder_reg. + if (holder_reg < 8) { + ASSERT_GE(output_.size() - gray_check_offset, 2u); + ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); + gray_check_offset +=2u; + } else { + ASSERT_GE(output_.size() - gray_check_offset, 6u); + ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u); + ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ + gray_check_offset += 6u; + } + } + // Verify that the lock word for gray bit check is loaded from the holder address. + ASSERT_GE(output_.size() - gray_check_offset, + 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u); + const uint32_t load_lock_word = + kLdrWInsn | + (holder_reg << 16) | + (/* IP */ 12 << 12) | + mirror::Object::MonitorOffset().Uint32Value(); + ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset)); + // Verify the gray bit check. + DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate. + uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift); + const uint32_t tst_gray_bit_without_offset = + 0xf0100f00 | (/* IP */ 12 << 16) + | (((ror_shift >> 4) & 1) << 26) // i + | (((ror_shift >> 1) & 7) << 12) // imm3 + | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift). + EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u)); + EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u); // BNE + // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset"). + const uint32_t fake_dependency = + 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00) + (/* IP */ 12) | // Rm = IP + (base_reg << 16) | // Rn = base_reg + (base_reg << 8); // Rd = base_reg + EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u)); + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); + } + } +} + +#define TEST_BAKER_FIELD_WIDE(offset, ref_reg) \ + TEST_F(Thumb2RelativePatcherTest, \ + BakerOffsetWide##offset##_##ref_reg) { \ + TestBakerFieldWide(offset, ref_reg); \ + } + +TEST_BAKER_FIELD_WIDE(/* offset */ 0, /* ref_reg */ 0) +TEST_BAKER_FIELD_WIDE(/* offset */ 8, /* ref_reg */ 3) +TEST_BAKER_FIELD_WIDE(/* offset */ 28, /* ref_reg */ 7) +TEST_BAKER_FIELD_WIDE(/* offset */ 0xffc, /* ref_reg */ 11) + +#define TEST_BAKER_FIELD_NARROW(offset, ref_reg) \ + TEST_F(Thumb2RelativePatcherTest, \ + BakerOffsetNarrow##offset##_##ref_reg) { \ + TestBakerFieldNarrow(offset, ref_reg); \ + } + +TEST_BAKER_FIELD_NARROW(/* offset */ 0, /* ref_reg */ 0) +TEST_BAKER_FIELD_NARROW(/* offset */ 8, /* ref_reg */ 3) +TEST_BAKER_FIELD_NARROW(/* offset */ 28, /* ref_reg */ 7) + +TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) { + // One thunk in the middle with maximum distance branches to it from both sides. + // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. + constexpr uint32_t kLiteralOffset1 = 6u; + const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); + ArrayRef<const uint8_t> code1(raw_code1); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false); + const LinkerPatch patches1[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), + }; + AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); + + constexpr uint32_t expected_thunk_offset = + kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u); + static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned."); + size_t filler1_size = expected_thunk_offset - + RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment); + std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u); + ArrayRef<const uint8_t> filler1_code(raw_filler1_code); + AddCompiledMethod(MethodRef(2u), filler1_code); + + // Enforce thunk reservation with a tiny method. + AddCompiledMethod(MethodRef(3u), kNopCode); + + constexpr uint32_t kLiteralOffset2 = 4; + static_assert(IsAligned<kArmAlignment>(kLiteralOffset2 + kPcAdjustment), + "PC for BNE must be aligned."); + + // Allow reaching the thunk from the very beginning of a method almost 1MiB away. Backward branch + // reaches the full 1MiB but we need to take PC adjustment into account. Things to subtract: + // - thunk size and method 3 pre-header, rounded up (padding in between if needed) + // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) + // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). + size_t thunk_size = + CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size(); + size_t filler2_size = + 1 * MB - (kLiteralOffset2 + kPcAdjustment) + - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment) + - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment) + - sizeof(OatQuickMethodHeader); + std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u); + ArrayRef<const uint8_t> filler2_code(raw_filler2_code); + AddCompiledMethod(MethodRef(4u), filler2_code); + + const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn}); + ArrayRef<const uint8_t> code2(raw_code2); + const LinkerPatch patches2[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data), + }; + AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2)); + + Link(); + + uint32_t first_method_offset = GetMethodOffset(1u); + uint32_t last_method_offset = GetMethodOffset(5u); + EXPECT_EQ(2 * MB, last_method_offset - first_method_offset); + + const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff; + const uint32_t bne_max_backward = kBneWPlus0 | 0x04000000; + const std::vector<uint8_t> expected_code1 = + RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn}); + const std::vector<uint8_t> expected_code2 = RawCode({kNopWInsn, bne_max_backward, kLdrWInsn}); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2))); +} + +TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkBeforeFiller) { + // Based on the first part of BakerOffsetThunkInTheMiddle but the BNE is one instruction + // earlier, so the thunk is emitted before the filler. + // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. + constexpr uint32_t kLiteralOffset1 = 4u; + const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn, kNopInsn}); + ArrayRef<const uint8_t> code1(raw_code1); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false); + const LinkerPatch patches1[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), + }; + AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); + + constexpr uint32_t expected_thunk_offset = + kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement + 2 */ (1u << 20); + static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned."); + size_t filler1_size = expected_thunk_offset - + RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment); + std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u); + ArrayRef<const uint8_t> filler1_code(raw_filler1_code); + AddCompiledMethod(MethodRef(2u), filler1_code); + + Link(); + + const uint32_t bne = BneWWithOffset(kLiteralOffset1, RoundUp(raw_code1.size(), kArmAlignment)); + const std::vector<uint8_t> expected_code1 = RawCode({kNopWInsn, bne, kLdrWInsn, kNopInsn}); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); +} + +TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddleUnreachableFromLast) { + // Based on the BakerOffsetThunkInTheMiddle but the BNE in the last method is preceded + // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end. + // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. + constexpr uint32_t kLiteralOffset1 = 6u; + const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); + ArrayRef<const uint8_t> code1(raw_code1); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false); + const LinkerPatch patches1[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), + }; + AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); + + constexpr uint32_t expected_thunk_offset = + kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u); + static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned."); + size_t filler1_size = expected_thunk_offset - + RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment); + std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u); + ArrayRef<const uint8_t> filler1_code(raw_filler1_code); + AddCompiledMethod(MethodRef(2u), filler1_code); + + // Enforce thunk reservation with a tiny method. + AddCompiledMethod(MethodRef(3u), kNopCode); + + constexpr uint32_t kReachableFromOffset2 = 4; + constexpr uint32_t kLiteralOffset2 = kReachableFromOffset2 + 2; + static_assert(IsAligned<kArmAlignment>(kReachableFromOffset2 + kPcAdjustment), + "PC for BNE must be aligned."); + + // If not for the extra NOP, this would allow reaching the thunk from the BNE + // of a method 1MiB away. Backward branch reaches the full 1MiB but we need to take + // PC adjustment into account. Things to subtract: + // - thunk size and method 3 pre-header, rounded up (padding in between if needed) + // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) + // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). + size_t thunk_size = + CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size(); + size_t filler2_size = + 1 * MB - (kReachableFromOffset2 + kPcAdjustment) + - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment) + - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment) + - sizeof(OatQuickMethodHeader); + std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u); + ArrayRef<const uint8_t> filler2_code(raw_filler2_code); + AddCompiledMethod(MethodRef(4u), filler2_code); + + // Extra 16-bit NOP compared to BakerOffsetThunkInTheMiddle. + const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); + ArrayRef<const uint8_t> code2(raw_code2); + const LinkerPatch patches2[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data), + }; + AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2)); + + Link(); + + uint32_t first_method_offset = GetMethodOffset(1u); + uint32_t last_method_offset = GetMethodOffset(5u); + EXPECT_EQ(2 * MB, last_method_offset - first_method_offset); + + const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff; + const uint32_t bne_last = + BneWWithOffset(kLiteralOffset2, RoundUp(raw_code2.size(), kArmAlignment)); + const std::vector<uint8_t> expected_code1 = + RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn}); + const std::vector<uint8_t> expected_code2 = + RawCode({kNopWInsn, kNopInsn, bne_last, kLdrWInsn}); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2))); +} + +TEST_F(Thumb2RelativePatcherTest, BakerArray) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. + 8, 9, 10, 11, // IP, SP, LR and PC are reserved. + }; + auto ldr = [](uint32_t base_reg) { + uint32_t index_reg = (base_reg == 0u) ? 1u : 0u; + uint32_t ref_reg = (base_reg == 2) ? 3u : 2u; + return kLdrRegLsl2 | index_reg | (base_reg << 16) | (ref_reg << 12); + }; + constexpr size_t kMethodCodeSize = 8u; + constexpr size_t kLiteralOffset = 0u; + uint32_t method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + ++method_idx; + const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr(base_reg)}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch( + kLiteralOffset, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)), + }; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); + method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + ++method_idx; + uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); + const std::vector<uint8_t> expected_code = RawCode({bne, ldr(base_reg)}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + // Verify that the lock word for gray bit check is loaded from the correct address + // before the base_reg which points to the array data. + ASSERT_GE(output_.size() - thunk_offset, + 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u); + int32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); + int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset; + ASSERT_LT(offset, 0); + ASSERT_GT(offset, -256); + const uint32_t load_lock_word = + kLdrNegativeOffset | + (-offset & 0xffu) | + (base_reg << 16) | + (/* IP */ 12 << 12); + EXPECT_EQ(load_lock_word, GetOutputInsn32(thunk_offset)); + // Verify the gray bit check. + DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate. + uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift); + const uint32_t tst_gray_bit_without_offset = + 0xf0100f00 | (/* IP */ 12 << 16) + | (((ror_shift >> 4) & 1) << 26) // i + | (((ror_shift >> 1) & 7) << 12) // imm3 + | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift). + EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(thunk_offset + 4u)); + EXPECT_EQ(0xd100u, GetOutputInsn16(thunk_offset + 8u) & 0xff00u); // BNE + // Verify the fake dependency. + const uint32_t fake_dependency = + 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00) + (/* IP */ 12) | // Rm = IP + (base_reg << 16) | // Rn = base_reg + (base_reg << 8); // Rd = base_reg + EXPECT_EQ(fake_dependency, GetOutputInsn32(thunk_offset + 14u)); + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); + } +} + +TEST_F(Thumb2RelativePatcherTest, BakerGcRootWide) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. + 8, 9, 10, 11, // IP, SP, LR and PC are reserved. + }; + constexpr size_t kMethodCodeSize = 8u; + constexpr size_t kLiteralOffset = 4u; + uint32_t method_idx = 0u; + for (uint32_t root_reg : valid_regs) { + ++method_idx; + uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12); + const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch( + kLiteralOffset, + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ false)), + }; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); + method_idx = 0u; + for (uint32_t root_reg : valid_regs) { + ++method_idx; + uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); + uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12); + const std::vector<uint8_t> expected_code = RawCode({ldr, bne}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ false); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + // Verify that the fast-path null-check uses the correct register, i.e. root_reg. + if (root_reg < 8) { + ASSERT_GE(output_.size() - thunk_offset, 2u); + ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); + } else { + ASSERT_GE(output_.size() - thunk_offset, 6u); + ASSERT_EQ(0xf1b00f00u | (root_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u); + ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ + } + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); + } +} + +TEST_F(Thumb2RelativePatcherTest, BakerGcRootNarrow) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. + // Not appplicable to high registers. + }; + constexpr size_t kMethodCodeSize = 6u; + constexpr size_t kLiteralOffset = 2u; + uint32_t method_idx = 0u; + for (uint32_t root_reg : valid_regs) { + ++method_idx; + uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg; + const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch( + kLiteralOffset, + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ true)), + }; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); + method_idx = 0u; + for (uint32_t root_reg : valid_regs) { + ++method_idx; + uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); + uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg; + const std::vector<uint8_t> expected_code = RawCode({ldr, bne}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ true); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg. + ASSERT_GE(output_.size() - thunk_offset, 2u); + ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); + } +} + +TEST_F(Thumb2RelativePatcherTest, BakerGcRootOffsetBits) { + // Test 1MiB of patches to the same thunk to stress-test different large offsets. + // (The low bits are not that important but the location of the high bits is easy to get wrong.) + std::vector<uint8_t> code; + code.reserve(1 * MB); + const size_t num_patches = 1 * MB / 8u; + std::vector<LinkerPatch> patches; + patches.reserve(num_patches); + const uint32_t ldr = + kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (/* root_reg */ 0 << 12); + uint32_t encoded_data = + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 0, /* narrow */ false); + for (size_t i = 0; i != num_patches; ++i) { + PushBackInsn(&code, ldr); + PushBackInsn(&code, kBneWPlus0); + patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data)); + } + ASSERT_EQ(1 * MB, code.size()); + ASSERT_EQ(num_patches, patches.size()); + AddCompiledMethod(MethodRef(1u), + ArrayRef<const uint8_t>(code), + ArrayRef<const LinkerPatch>(patches)); + Link(); + + // The thunk is right after the method code. + DCHECK_ALIGNED(1 * MB, kArmAlignment); + std::vector<uint8_t> expected_code; + for (size_t i = 0; i != num_patches; ++i) { + PushBackInsn(&expected_code, ldr); + PushBackInsn(&expected_code, BneWWithOffset(8u * i + 4u, 1 * MB)); + patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data)); + } + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(Thumb2RelativePatcherTest, BakerAndMethodCallInteraction) { + // During development, there was a `DCHECK_LE(MaxNextOffset(), next_thunk.MaxNextOffset());` + // in `ArmBaseRelativePatcher::ThunkData::MakeSpaceBefore()` which does not necessarily + // hold when we're reserving thunks of different sizes. This test exposes the situation + // by using Baker thunks and a method call thunk. + + // Add a method call patch that can reach to method 1 offset + 16MiB. + uint32_t method_idx = 0u; + constexpr size_t kMethodCallLiteralOffset = 2u; + constexpr uint32_t kMissingMethodIdx = 2u; + const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kBlPlus0}); + const LinkerPatch method1_patches[] = { + LinkerPatch::RelativeCodePatch(kMethodCallLiteralOffset, nullptr, 2u), + }; + ArrayRef<const uint8_t> code1(raw_code1); + ++method_idx; + AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(method1_patches)); + + // Skip kMissingMethodIdx. + ++method_idx; + ASSERT_EQ(kMissingMethodIdx, method_idx); + // Add a method with the right size that the method code for the next one starts 1MiB + // after code for method 1. + size_t filler_size = + 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment) + - sizeof(OatQuickMethodHeader); + std::vector<uint8_t> filler_code = GenNops(filler_size / 2u); + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code)); + // Add 14 methods with 1MiB code+header, making the code for the next method start 1MiB + // before the currently scheduled MaxNextOffset() for the method call thunk. + for (uint32_t i = 0; i != 14; ++i) { + filler_size = 1 * MB - sizeof(OatQuickMethodHeader); + filler_code = GenNops(filler_size / 2u); + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code)); + } + + // Add 2 Baker GC root patches to the last method, one that would allow the thunk at + // 1MiB + kArmAlignment, i.e. kArmAlignment after the method call thunk, and the + // second that needs it kArmAlignment after that. Given the size of the GC root thunk + // is more than the space required by the method call thunk plus kArmAlignment, + // this pushes the first GC root thunk's pending MaxNextOffset() before the method call + // thunk's pending MaxNextOffset() which needs to be adjusted. + ASSERT_LT(RoundUp(CompileMethodCallThunk().size(), kArmAlignment) + kArmAlignment, + CompileBakerGcRootThunk(/* root_reg */ 0, /* narrow */ false).size()); + static_assert(kArmAlignment == 8, "Code below assumes kArmAlignment == 8"); + constexpr size_t kBakerLiteralOffset1 = kArmAlignment + 2u - kPcAdjustment; + constexpr size_t kBakerLiteralOffset2 = kBakerLiteralOffset1 + kArmAlignment; + // Use offset = 0, base_reg = 0, the LDR is simply `kLdrWInsn | (root_reg << 12)`. + const uint32_t ldr1 = kLdrWInsn | (/* root_reg */ 1 << 12); + const uint32_t ldr2 = kLdrWInsn | (/* root_reg */ 2 << 12); + const std::vector<uint8_t> last_method_raw_code = RawCode({ + kNopInsn, // Padding before first GC root read barrier. + ldr1, kBneWPlus0, // First GC root LDR with read barrier. + ldr2, kBneWPlus0, // Second GC root LDR with read barrier. + }); + uint32_t encoded_data1 = + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1, /* narrow */ false); + uint32_t encoded_data2 = + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2, /* narrow */ false); + const LinkerPatch last_method_patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset1, encoded_data1), + LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset2, encoded_data2), + }; + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), + ArrayRef<const uint8_t>(last_method_raw_code), + ArrayRef<const LinkerPatch>(last_method_patches)); + + // The main purpose of the test is to check that Link() does not cause a crash. + Link(); + + ASSERT_EQ(15 * MB, GetMethodOffset(method_idx) - GetMethodOffset(1u)); +} + } // namespace linker } // namespace art diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc index 551c73b2a4..117684a66b 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -29,6 +29,7 @@ #include "mirror/array-inl.h" #include "oat.h" #include "oat_quick_method_header.h" +#include "read_barrier.h" #include "utils/arm64/assembler_arm64.h" namespace art { @@ -53,13 +54,11 @@ constexpr uint32_t kAdrpThunkSize = 8u; inline bool IsAdrpPatch(const LinkerPatch& patch) { switch (patch.GetType()) { - case LinkerPatch::Type::kMethod: case LinkerPatch::Type::kCall: case LinkerPatch::Type::kCallRelative: - case LinkerPatch::Type::kType: - case LinkerPatch::Type::kString: case LinkerPatch::Type::kBakerReadBarrierBranch: return false; + case LinkerPatch::Type::kMethodRelative: case LinkerPatch::Type::kTypeRelative: case LinkerPatch::Type::kTypeBssEntry: case LinkerPatch::Type::kStringRelative: @@ -251,11 +250,13 @@ void Arm64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, if ((insn & 0xfffffc00) == 0x91000000) { // ADD immediate, 64-bit with imm12 == 0 (unset). if (!kEmitCompilerReadBarrier) { - DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative || + DCHECK(patch.GetType() == LinkerPatch::Type::kMethodRelative || + patch.GetType() == LinkerPatch::Type::kStringRelative || patch.GetType() == LinkerPatch::Type::kTypeRelative) << patch.GetType(); } else { // With the read barrier (non-Baker) enabled, it could be kStringBssEntry or kTypeBssEntry. - DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative || + DCHECK(patch.GetType() == LinkerPatch::Type::kMethodRelative || + patch.GetType() == LinkerPatch::Type::kStringRelative || patch.GetType() == LinkerPatch::Type::kTypeRelative || patch.GetType() == LinkerPatch::Type::kStringBssEntry || patch.GetType() == LinkerPatch::Type::kTypeBssEntry) << patch.GetType(); @@ -304,27 +305,42 @@ void Arm64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* cod DCHECK_LT(literal_offset, code->size()); uint32_t insn = GetInsn(code, literal_offset); DCHECK_EQ(insn & 0xffffffe0u, 0xb5000000); // CBNZ Xt, +0 (unpatched) - ThunkKey key = GetBakerReadBarrierKey(patch); + ThunkKey key = GetBakerThunkKey(patch); if (kIsDebugBuild) { + const uint32_t encoded_data = key.GetCustomValue1(); + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); // Check that the next instruction matches the expected LDR. - switch (key.GetType()) { - case ThunkType::kBakerReadBarrierField: { + switch (kind) { + case BakerReadBarrierKind::kField: { DCHECK_GE(code->size() - literal_offset, 8u); uint32_t next_insn = GetInsn(code, literal_offset + 4u); // LDR (immediate) with correct base_reg. CheckValidReg(next_insn & 0x1fu); // Check destination register. - CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (key.GetOffsetParams().base_reg << 5)); + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5)); break; } - case ThunkType::kBakerReadBarrierRoot: { + case BakerReadBarrierKind::kArray: { + DCHECK_GE(code->size() - literal_offset, 8u); + uint32_t next_insn = GetInsn(code, literal_offset + 4u); + // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL), + // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2]. + CheckValidReg(next_insn & 0x1fu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5)); + CheckValidReg((next_insn >> 16) & 0x1f); // Check index register + break; + } + case BakerReadBarrierKind::kGcRoot: { DCHECK_GE(literal_offset, 4u); uint32_t prev_insn = GetInsn(code, literal_offset - 4u); // LDR (immediate) with correct root_reg. - CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | key.GetRootParams().root_reg); + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg); break; } default: - LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(key.GetType()); + LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); UNREACHABLE(); } } @@ -336,40 +352,6 @@ void Arm64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* cod SetInsn(code, literal_offset, insn); } -ArmBaseRelativePatcher::ThunkKey Arm64RelativePatcher::GetBakerReadBarrierKey( - const LinkerPatch& patch) { - DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kBakerReadBarrierBranch); - uint32_t value = patch.GetBakerCustomValue1(); - BakerReadBarrierKind type = BakerReadBarrierKindField::Decode(value); - ThunkParams params; - switch (type) { - case BakerReadBarrierKind::kField: - params.offset_params.base_reg = BakerReadBarrierFirstRegField::Decode(value); - CheckValidReg(params.offset_params.base_reg); - params.offset_params.holder_reg = BakerReadBarrierSecondRegField::Decode(value); - CheckValidReg(params.offset_params.holder_reg); - break; - case BakerReadBarrierKind::kGcRoot: - params.root_params.root_reg = BakerReadBarrierFirstRegField::Decode(value); - CheckValidReg(params.root_params.root_reg); - params.root_params.dummy = 0u; - DCHECK_EQ(BakerReadBarrierSecondRegField::Decode(value), kInvalidEncodedReg); - break; - default: - LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(type); - UNREACHABLE(); - } - constexpr uint8_t kTypeTranslationOffset = 1u; - static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kField) + kTypeTranslationOffset == - static_cast<uint32_t>(ThunkType::kBakerReadBarrierField), - "Thunk type translation check."); - static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kGcRoot) + kTypeTranslationOffset == - static_cast<uint32_t>(ThunkType::kBakerReadBarrierRoot), - "Thunk type translation check."); - return ThunkKey(static_cast<ThunkType>(static_cast<uint32_t>(type) + kTypeTranslationOffset), - params); -} - #define __ assembler.GetVIXLAssembler()-> static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, @@ -394,33 +376,27 @@ static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, // Introduce a dependency on the lock_word including rb_state, // to prevent load-load reordering, and without using // a memory barrier (which would be more expensive). - __ Add(base_reg, base_reg, Operand(vixl::aarch64::ip0, LSR, 32)); + __ Add(base_reg, base_reg, Operand(ip0, LSR, 32)); __ Br(lr); // And return back to the function. // Note: The fake dependency is unnecessary for the slow path. } -std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) { +void Arm64RelativePatcher::CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler, + uint32_t encoded_data) { using namespace vixl::aarch64; // NOLINT(build/namespaces) - ArenaPool pool; - ArenaAllocator arena(&pool); - arm64::Arm64Assembler assembler(&arena); - - switch (key.GetType()) { - case ThunkType::kMethodCall: { - // The thunk just uses the entry point in the ArtMethod. This works even for calls - // to the generic JNI and interpreter trampolines. - Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArm64PointerSize).Int32Value()); - assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); - break; - } - case ThunkType::kBakerReadBarrierField: { + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); + switch (kind) { + case BakerReadBarrierKind::kField: { // Check if the holder is gray and, if not, add fake dependency to the base register // and return to the LDR instruction to load the reference. Otherwise, use introspection // to load the reference and call the entrypoint (in IP1) that performs further checks // on the reference and marks it if needed. - auto holder_reg = Register::GetXRegFromCode(key.GetOffsetParams().holder_reg); - auto base_reg = Register::GetXRegFromCode(key.GetOffsetParams().base_reg); + auto base_reg = + Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + auto holder_reg = + Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data)); + CheckValidReg(holder_reg.GetCode()); UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); temps.Exclude(ip0, ip1); // If base_reg differs from holder_reg, the offset was too large and we must have @@ -444,17 +420,43 @@ std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) { // Add null check slow path. The stack map is at the address pointed to by LR. __ Bind(&throw_npe); int32_t offset = GetThreadOffset<kArm64PointerSize>(kQuickThrowNullPointer).Int32Value(); - __ Ldr(ip0, MemOperand(vixl::aarch64::x19, offset)); + __ Ldr(ip0, MemOperand(/* Thread* */ vixl::aarch64::x19, offset)); __ Br(ip0); } break; } - case ThunkType::kBakerReadBarrierRoot: { + case BakerReadBarrierKind::kArray: { + auto base_reg = + Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip0, ip1); + vixl::aarch64::Label slow_path; + int32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); + MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset); + DCHECK_LT(lock_word.GetOffset(), 0); + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path); + __ Bind(&slow_path); + MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); + __ Ldr(ip0.W(), ldr_address); // Load the LDR (register) unsigned offset. + __ Ubfx(ip0, ip0, 16, 6); // Extract the index register, plus 32 (bit 21 is set). + __ Bfi(ip1, ip0, 3, 6); // Insert ip0 to the entrypoint address to create + // a switch case target based on the index register. + __ Mov(ip0, base_reg); // Move the base register to ip0. + __ Br(ip1); // Jump to the entrypoint's array switch case. + break; + } + case BakerReadBarrierKind::kGcRoot: { // Check if the reference needs to be marked and if so (i.e. not null, not marked yet // and it does not have a forwarding address), call the correct introspection entrypoint; // otherwise return the reference (or the extracted forwarding address). // There is no gray bit check for GC roots. - auto root_reg = Register::GetWRegFromCode(key.GetRootParams().root_reg); + auto root_reg = + Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(root_reg.GetCode()); + DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); temps.Exclude(ip0, ip1); vixl::aarch64::Label return_label, not_marked, forwarding_address; @@ -477,6 +479,30 @@ std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) { __ Br(lr); break; } + default: + LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); + UNREACHABLE(); + } +} + +std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) { + ArenaPool pool; + ArenaAllocator arena(&pool); + arm64::Arm64Assembler assembler(&arena); + + switch (key.GetType()) { + case ThunkType::kMethodCall: { + // The thunk just uses the entry point in the ArtMethod. This works even for calls + // to the generic JNI and interpreter trampolines. + Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArm64PointerSize).Int32Value()); + assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); + break; + } + case ThunkType::kBakerReadBarrier: { + CompileBakerReadBarrierThunk(assembler, key.GetCustomValue1()); + break; + } } // Ensure we emit the literal pool. @@ -489,22 +515,20 @@ std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) { #undef __ -uint32_t Arm64RelativePatcher::MaxPositiveDisplacement(ThunkType type) { - switch (type) { +uint32_t Arm64RelativePatcher::MaxPositiveDisplacement(const ThunkKey& key) { + switch (key.GetType()) { case ThunkType::kMethodCall: return kMaxMethodCallPositiveDisplacement; - case ThunkType::kBakerReadBarrierField: - case ThunkType::kBakerReadBarrierRoot: + case ThunkType::kBakerReadBarrier: return kMaxBcondPositiveDisplacement; } } -uint32_t Arm64RelativePatcher::MaxNegativeDisplacement(ThunkType type) { - switch (type) { +uint32_t Arm64RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) { + switch (key.GetType()) { case ThunkType::kMethodCall: return kMaxMethodCallNegativeDisplacement; - case ThunkType::kBakerReadBarrierField: - case ThunkType::kBakerReadBarrierRoot: + case ThunkType::kBakerReadBarrier: return kMaxBcondNegativeDisplacement; } } @@ -543,10 +567,10 @@ bool Arm64RelativePatcher::NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, return false; } - // And since LinkerPatch::Type::kStringRelative is using the result of the ADRP - // for an ADD immediate, check for that as well. We generalize a bit to include - // ADD/ADDS/SUB/SUBS immediate that either uses the ADRP destination or stores - // the result to a different register. + // And since LinkerPatch::Type::k{Method,Type,String}Relative is using the result + // of the ADRP for an ADD immediate, check for that as well. We generalize a bit + // to include ADD/ADDS/SUB/SUBS immediate that either uses the ADRP destination + // or stores the result to a different register. if ((next_insn & 0x1f000000) == 0x11000000 && ((((next_insn >> 5) ^ adrp) & 0x1f) == 0 || ((next_insn ^ adrp) & 0x1f) != 0)) { return false; diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h index 7887cea5e6..b00dd081b6 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.h +++ b/compiler/linker/arm64/relative_patcher_arm64.h @@ -19,19 +19,19 @@ #include "base/array_ref.h" #include "base/bit_field.h" +#include "base/bit_utils.h" #include "linker/arm/relative_patcher_arm_base.h" namespace art { + +namespace arm64 { +class Arm64Assembler; +} // namespace arm64 + namespace linker { class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { public: - enum class BakerReadBarrierKind : uint8_t { - kField, // Field get or array get with constant offset (i.e. constant index). - kGcRoot, // GC root load. - kLast - }; - static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) { CheckValidReg(base_reg); CheckValidReg(holder_reg); @@ -40,6 +40,13 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { BakerReadBarrierSecondRegField::Encode(holder_reg); } + static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) { + CheckValidReg(base_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg); + } + static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) { CheckValidReg(root_reg); return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) | @@ -68,14 +75,20 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { uint32_t patch_offset) OVERRIDE; protected: - static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u; - - ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) OVERRIDE; std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE; - uint32_t MaxPositiveDisplacement(ThunkType type) OVERRIDE; - uint32_t MaxNegativeDisplacement(ThunkType type) OVERRIDE; + uint32_t MaxPositiveDisplacement(const ThunkKey& key) OVERRIDE; + uint32_t MaxNegativeDisplacement(const ThunkKey& key) OVERRIDE; private: + static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u; + + enum class BakerReadBarrierKind : uint8_t { + kField, // Field get or array get with constant offset (i.e. constant index). + kArray, // Array get with index in register. + kGcRoot, // GC root load. + kLast = kGcRoot + }; + static constexpr size_t kBitsForBakerReadBarrierKind = MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast)); static constexpr size_t kBitsForRegister = 5u; @@ -87,9 +100,11 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>; static void CheckValidReg(uint32_t reg) { - DCHECK(reg < 30u && reg != 16u && reg != 17u); + DCHECK(reg < 30u && reg != 16u && reg != 17u) << reg; } + void CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler, uint32_t encoded_data); + static uint32_t PatchAdrp(uint32_t adrp, uint32_t disp); static bool NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, uint32_t literal_offset, diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc index b4d35ab2a7..5d02d449fe 100644 --- a/compiler/linker/arm64/relative_patcher_arm64_test.cc +++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc @@ -18,6 +18,7 @@ #include "linker/relative_patcher_test.h" #include "linker/arm64/relative_patcher_arm64.h" #include "lock_word.h" +#include "mirror/array-inl.h" #include "mirror/object.h" #include "oat_quick_method_header.h" @@ -46,9 +47,15 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { static constexpr uint32_t kBlPlusMax = 0x95ffffffu; static constexpr uint32_t kBlMinusMax = 0x96000000u; - // LDR immediate, unsigned offset. + // LDR immediate, 32-bit, unsigned offset. static constexpr uint32_t kLdrWInsn = 0xb9400000u; + // LDR register, 32-bit, LSL #2. + static constexpr uint32_t kLdrWLsl2Insn = 0xb8607800u; + + // LDUR, 32-bit. + static constexpr uint32_t kLdurWInsn = 0xb8400000u; + // ADD/ADDS/SUB/SUBS immediate, 64-bit. static constexpr uint32_t kAddXInsn = 0x91000000u; static constexpr uint32_t kAddsXInsn = 0xb1000000u; @@ -68,7 +75,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { static constexpr uint32_t kLdrXSpRelInsn = 0xf94003edu; // CBNZ x17, +0. Bits 5-23 are a placeholder for target offset from PC in units of 4-bytes. - static constexpr uint32_t kCbnzIP1Plus0Insn = 0xb5000011; + static constexpr uint32_t kCbnzIP1Plus0Insn = 0xb5000011u; void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) { CHECK_LE(pos, code->size()); @@ -160,9 +167,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { } std::vector<uint8_t> CompileMethodCallThunk() { - ArmBaseRelativePatcher::ThunkKey key( - ArmBaseRelativePatcher::ThunkType::kMethodCall, - ArmBaseRelativePatcher::ThunkParams{{ 0, 0 }}); // NOLINT(whitespace/braces) + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetMethodCallKey(); return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key); } @@ -188,7 +193,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { std::vector<uint8_t> GenNops(size_t num_nops) { std::vector<uint8_t> result; - result.reserve(num_nops * 4u + 4u); + result.reserve(num_nops * 4u); for (size_t i = 0; i != num_nops; ++i) { PushBackInsn(&result, kNopInsn); } @@ -228,7 +233,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { } else { LOG(FATAL) << "Unexpected instruction: 0x" << std::hex << use_insn; } - uint32_t adrp = 0x90000000 | // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64) + uint32_t adrp = 0x90000000u | // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64) ((disp & 0x3000u) << (29 - 12)) | // immlo = ((disp & 0x3000u) >> 12) is at bit 29, ((disp & 0xffffc000) >> (14 - 5)) | // immhi = (disp >> 14) is at bit 5, // We take the sign bit from the disp, limiting disp to +- 2GiB. @@ -244,12 +249,14 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { return GenNopsAndAdrpAndUse(num_nops, method_offset, target_offset, kLdrWInsn); } - void TestNopsAdrpLdr(size_t num_nops, uint32_t dex_cache_arrays_begin, uint32_t element_offset) { - dex_cache_arrays_begin_ = dex_cache_arrays_begin; + void TestNopsAdrpLdr(size_t num_nops, uint32_t bss_begin, uint32_t string_entry_offset) { + constexpr uint32_t kStringIndex = 1u; + string_index_to_offset_map_.Put(kStringIndex, string_entry_offset); + bss_begin_ = bss_begin; auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u); // Unpatched. const LinkerPatch patches[] = { - LinkerPatch::DexCacheArrayPatch(num_nops * 4u , nullptr, num_nops * 4u, element_offset), - LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, element_offset), + LinkerPatch::StringBssEntryPatch(num_nops * 4u , nullptr, num_nops * 4u, kStringIndex), + LinkerPatch::StringBssEntryPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, kStringIndex), }; AddCompiledMethod(MethodRef(1u), ArrayRef<const uint8_t>(code), @@ -257,7 +264,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { Link(); uint32_t method1_offset = GetMethodOffset(1u); - uint32_t target_offset = dex_cache_arrays_begin_ + element_offset; + uint32_t target_offset = bss_begin_ + string_entry_offset; auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset); EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); } @@ -288,14 +295,16 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { void PrepareNopsAdrpInsn2Ldr(size_t num_nops, uint32_t insn2, - uint32_t dex_cache_arrays_begin, - uint32_t element_offset) { - dex_cache_arrays_begin_ = dex_cache_arrays_begin; + uint32_t bss_begin, + uint32_t string_entry_offset) { + constexpr uint32_t kStringIndex = 1u; + string_index_to_offset_map_.Put(kStringIndex, string_entry_offset); + bss_begin_ = bss_begin; auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u); // Unpatched. InsertInsn(&code, num_nops * 4u + 4u, insn2); const LinkerPatch patches[] = { - LinkerPatch::DexCacheArrayPatch(num_nops * 4u , nullptr, num_nops * 4u, element_offset), - LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, element_offset), + LinkerPatch::StringBssEntryPatch(num_nops * 4u , nullptr, num_nops * 4u, kStringIndex), + LinkerPatch::StringBssEntryPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, kStringIndex), }; AddCompiledMethod(MethodRef(1u), ArrayRef<const uint8_t>(code), @@ -371,15 +380,15 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { void TestAdrpInsn2Ldr(uint32_t insn2, uint32_t adrp_offset, bool has_thunk, - uint32_t dex_cache_arrays_begin, - uint32_t element_offset) { + uint32_t bss_begin, + uint32_t string_entry_offset) { uint32_t method1_offset = kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader); ASSERT_LT(method1_offset, adrp_offset); CHECK_ALIGNED(adrp_offset, 4u); uint32_t num_nops = (adrp_offset - method1_offset) / 4u; - PrepareNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset); - uint32_t target_offset = dex_cache_arrays_begin_ + element_offset; + PrepareNopsAdrpInsn2Ldr(num_nops, insn2, bss_begin, string_entry_offset); + uint32_t target_offset = bss_begin_ + string_entry_offset; if (has_thunk) { TestNopsAdrpInsn2AndUseHasThunk(num_nops, insn2, target_offset, kLdrWInsn); } else { @@ -390,33 +399,33 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { void TestAdrpLdurLdr(uint32_t adrp_offset, bool has_thunk, - uint32_t dex_cache_arrays_begin, - uint32_t element_offset) { - TestAdrpInsn2Ldr(kLdurInsn, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset); + uint32_t bss_begin, + uint32_t string_entry_offset) { + TestAdrpInsn2Ldr(kLdurInsn, adrp_offset, has_thunk, bss_begin, string_entry_offset); } void TestAdrpLdrPcRelLdr(uint32_t pcrel_ldr_insn, int32_t pcrel_disp, uint32_t adrp_offset, bool has_thunk, - uint32_t dex_cache_arrays_begin, - uint32_t element_offset) { + uint32_t bss_begin, + uint32_t string_entry_offset) { ASSERT_LT(pcrel_disp, 0x100000); ASSERT_GE(pcrel_disp, -0x100000); ASSERT_EQ(pcrel_disp & 0x3, 0); uint32_t insn2 = pcrel_ldr_insn | (((static_cast<uint32_t>(pcrel_disp) >> 2) & 0x7ffffu) << 5); - TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset); + TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, bss_begin, string_entry_offset); } void TestAdrpLdrSpRelLdr(uint32_t sprel_ldr_insn, uint32_t sprel_disp_in_load_units, uint32_t adrp_offset, bool has_thunk, - uint32_t dex_cache_arrays_begin, - uint32_t element_offset) { + uint32_t bss_begin, + uint32_t string_entry_offset) { ASSERT_LT(sprel_disp_in_load_units, 0x1000u); uint32_t insn2 = sprel_ldr_insn | ((sprel_disp_in_load_units & 0xfffu) << 10); - TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset); + TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, bss_begin, string_entry_offset); } void TestAdrpInsn2Add(uint32_t insn2, @@ -466,17 +475,22 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, uint32_t holder_reg) { const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( 0u, Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg)); - auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get()); - ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch); - return patcher->CompileThunk(key); + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); + return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key); + } + + std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) { + LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( + 0u, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)); + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); + return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key); } std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) { LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( 0u, Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)); - auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get()); - ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch); - return patcher->CompileThunk(key); + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); + return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key); } uint32_t GetOutputInsn(uint32_t offset) { @@ -488,7 +502,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { (static_cast<uint32_t>(output_[offset + 3]) << 24); } - void TestBakerField(uint32_t offset, uint32_t root_reg); + void TestBakerField(uint32_t offset, uint32_t ref_reg); }; const uint8_t Arm64RelativePatcherTest::kCallRawCode[] = { @@ -716,19 +730,19 @@ TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarBefore) { EXPECT_TRUE(CheckThunk(thunk_offset)); } -TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference1) { +TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry1) { TestNopsAdrpLdr(0u, 0x12345678u, 0x1234u); } -TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference2) { +TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry2) { TestNopsAdrpLdr(0u, -0x12345678u, 0x4444u); } -TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference3) { +TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry3) { TestNopsAdrpLdr(0u, 0x12345000u, 0x3ffcu); } -TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference4) { +TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry4) { TestNopsAdrpLdr(0u, 0x12345000u, 0x4000u); } @@ -753,7 +767,7 @@ TEST_F(Arm64RelativePatcherTestDefault, StringReference4) { test(0xff4u, disp2) test(0xff8u, disp2) test(0xffcu, disp2) test(0x1000u, disp2) #define DEFAULT_LDUR_LDR_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## Ldur ## disp) { \ + TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## Ldur ## disp) { \ bool has_thunk = ((adrp_offset) == 0xff8u || (adrp_offset) == 0xffcu); \ TestAdrpLdurLdr(adrp_offset, has_thunk, 0x12345678u, disp); \ } @@ -761,7 +775,7 @@ TEST_F(Arm64RelativePatcherTestDefault, StringReference4) { TEST_FOR_OFFSETS(DEFAULT_LDUR_LDR_TEST, 0x1234, 0x1238) #define DENVER64_LDUR_LDR_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference ## adrp_offset ## Ldur ## disp) { \ + TEST_F(Arm64RelativePatcherTestDenver64, StringBssEntry ## adrp_offset ## Ldur ## disp) { \ TestAdrpLdurLdr(adrp_offset, false, 0x12345678u, disp); \ } @@ -769,7 +783,7 @@ TEST_FOR_OFFSETS(DENVER64_LDUR_LDR_TEST, 0x1234, 0x1238) // LDR <Wt>, <label> is always aligned. We should never have to use a fixup. #define LDRW_PCREL_LDR_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## WPcRel ## disp) { \ + TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## WPcRel ## disp) { \ TestAdrpLdrPcRelLdr(kLdrWPcRelInsn, disp, adrp_offset, false, 0x12345678u, 0x1234u); \ } @@ -777,7 +791,7 @@ TEST_FOR_OFFSETS(LDRW_PCREL_LDR_TEST, 0x1234, 0x1238) // LDR <Xt>, <label> is aligned when offset + displacement is a multiple of 8. #define LDRX_PCREL_LDR_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## XPcRel ## disp) { \ + TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## XPcRel ## disp) { \ bool unaligned = !IsAligned<8u>((adrp_offset) + 4u + static_cast<uint32_t>(disp)); \ bool has_thunk = ((adrp_offset) == 0xff8u || (adrp_offset) == 0xffcu) && unaligned; \ TestAdrpLdrPcRelLdr(kLdrXPcRelInsn, disp, adrp_offset, has_thunk, 0x12345678u, 0x1234u); \ @@ -787,14 +801,14 @@ TEST_FOR_OFFSETS(LDRX_PCREL_LDR_TEST, 0x1234, 0x1238) // LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned. No fixup needed. #define LDRW_SPREL_LDR_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## WSpRel ## disp) { \ + TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## WSpRel ## disp) { \ TestAdrpLdrSpRelLdr(kLdrWSpRelInsn, (disp) >> 2, adrp_offset, false, 0x12345678u, 0x1234u); \ } TEST_FOR_OFFSETS(LDRW_SPREL_LDR_TEST, 0, 4) #define LDRX_SPREL_LDR_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## XSpRel ## disp) { \ + TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## XSpRel ## disp) { \ TestAdrpLdrSpRelLdr(kLdrXSpRelInsn, (disp) >> 3, adrp_offset, false, 0x12345678u, 0x1234u); \ } @@ -885,7 +899,7 @@ TEST_FOR_OFFSETS(LDRW_SPREL_ADD_TEST, 0, 4) TEST_FOR_OFFSETS(LDRX_SPREL_ADD_TEST, 0, 8) -void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg) { +void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg) { uint32_t valid_regs[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved. @@ -899,7 +913,7 @@ void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg uint32_t method_idx = 0u; for (uint32_t base_reg : valid_regs) { for (uint32_t holder_reg : valid_regs) { - uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | root_reg; + uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg; const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr}); ASSERT_EQ(kMethodCodeSize, raw_code.size()); ArrayRef<const uint8_t> code(raw_code); @@ -922,7 +936,7 @@ void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg ++method_idx; uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset); uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2)); - uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | root_reg; + uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg; const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr}); ASSERT_EQ(kMethodCodeSize, expected_code.size()); ASSERT_TRUE( @@ -942,7 +956,7 @@ void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg if (holder_reg == base_reg) { // Verify that the null-check CBZ uses the correct register, i.e. holder_reg. ASSERT_GE(output_.size() - gray_check_offset, 4u); - ASSERT_EQ(0x34000000 | holder_reg, GetOutputInsn(thunk_offset) & 0xff00001f); + ASSERT_EQ(0x34000000u | holder_reg, GetOutputInsn(thunk_offset) & 0xff00001fu); gray_check_offset +=4u; } // Verify that the lock word for gray bit check is loaded from the holder address. @@ -955,12 +969,12 @@ void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg /* ip0 */ 16; EXPECT_EQ(load_lock_word, GetOutputInsn(gray_check_offset)); // Verify the gray bit check. - const uint32_t check_gray_bit_witout_offset = - 0x37000000 | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16; - EXPECT_EQ(check_gray_bit_witout_offset, GetOutputInsn(gray_check_offset + 4u) & 0xfff8001f); + const uint32_t check_gray_bit_without_offset = + 0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16; + EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(gray_check_offset + 4u) & 0xfff8001fu); // Verify the fake dependency. const uint32_t fake_dependency = - 0x8b408000 | // ADD Xd, Xn, Xm, LSR 32 + 0x8b408000u | // ADD Xd, Xn, Xm, LSR 32 (/* ip0 */ 16 << 16) | // Xm = ip0 (base_reg << 5) | // Xn = base_reg base_reg; // Xd = base_reg @@ -973,19 +987,19 @@ void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg } } -#define TEST_BAKER_FIELD(offset, root_reg) \ +#define TEST_BAKER_FIELD(offset, ref_reg) \ TEST_F(Arm64RelativePatcherTestDefault, \ - BakerOffset##offset##_##root_reg) { \ - TestBakerField(offset, root_reg); \ + BakerOffset##offset##_##ref_reg) { \ + TestBakerField(offset, ref_reg); \ } -TEST_BAKER_FIELD(/* offset */ 0, /* root_reg */ 0) -TEST_BAKER_FIELD(/* offset */ 8, /* root_reg */ 15) -TEST_BAKER_FIELD(/* offset */ 0x3ffc, /* root_reg */ 29) +TEST_BAKER_FIELD(/* offset */ 0, /* ref_reg */ 0) +TEST_BAKER_FIELD(/* offset */ 8, /* ref_reg */ 15) +TEST_BAKER_FIELD(/* offset */ 0x3ffc, /* ref_reg */ 29) TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddle) { // One thunk in the middle with maximum distance branches to it from both sides. - // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`. + // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. constexpr uint32_t kLiteralOffset1 = 4; const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn}); ArrayRef<const uint8_t> code1(raw_code1); @@ -1046,7 +1060,7 @@ TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddle) { TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkBeforeFiller) { // Based on the first part of BakerOffsetThunkInTheMiddle but the CBNZ is one instruction // earlier, so the thunk is emitted before the filler. - // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`. + // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. constexpr uint32_t kLiteralOffset1 = 0; const std::vector<uint8_t> raw_code1 = RawCode({kCbnzIP1Plus0Insn, kLdrWInsn, kNopInsn}); ArrayRef<const uint8_t> code1(raw_code1); @@ -1076,7 +1090,7 @@ TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkBeforeFiller) { TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddleUnreachableFromLast) { // Based on the BakerOffsetThunkInTheMiddle but the CBNZ in the last method is preceded // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end. - // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`. + // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. constexpr uint32_t kLiteralOffset1 = 4; const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn}); ArrayRef<const uint8_t> code1(raw_code1); @@ -1132,7 +1146,88 @@ TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddleUnreachableFr ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2))); } -TEST_F(Arm64RelativePatcherTestDefault, BakerRootGcRoot) { +TEST_F(Arm64RelativePatcherTestDefault, BakerArray) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved. + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + // LR and SP/ZR are reserved. + }; + auto ldr = [](uint32_t base_reg) { + uint32_t index_reg = (base_reg == 0u) ? 1u : 0u; + uint32_t ref_reg = (base_reg == 2) ? 3u : 2u; + return kLdrWLsl2Insn | (index_reg << 16) | (base_reg << 5) | ref_reg; + }; + constexpr size_t kMethodCodeSize = 8u; + constexpr size_t kLiteralOffset = 0u; + uint32_t method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + ++method_idx; + const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr(base_reg)}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch( + kLiteralOffset, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)), + }; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArm64Alignment); + method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + ++method_idx; + uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset); + uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2)); + const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr(base_reg)}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + // Verify that the lock word for gray bit check is loaded from the correct address + // before the base_reg which points to the array data. + static constexpr size_t kGrayCheckInsns = 5; + ASSERT_GE(output_.size() - thunk_offset, 4u * kGrayCheckInsns); + int32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); + int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset; + ASSERT_LT(offset, 0); + const uint32_t load_lock_word = + kLdurWInsn | + ((offset & 0x1ffu) << 12) | + (base_reg << 5) | + /* ip0 */ 16; + EXPECT_EQ(load_lock_word, GetOutputInsn(thunk_offset)); + // Verify the gray bit check. + const uint32_t check_gray_bit_without_offset = + 0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16; + EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(thunk_offset + 4u) & 0xfff8001fu); + // Verify the fake dependency. + const uint32_t fake_dependency = + 0x8b408000u | // ADD Xd, Xn, Xm, LSR 32 + (/* ip0 */ 16 << 16) | // Xm = ip0 + (base_reg << 5) | // Xn = base_reg + base_reg; // Xd = base_reg + EXPECT_EQ(fake_dependency, GetOutputInsn(thunk_offset + 12u)); + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArm64Alignment); + } +} + +TEST_F(Arm64RelativePatcherTestDefault, BakerGcRoot) { uint32_t valid_regs[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved. @@ -1180,7 +1275,7 @@ TEST_F(Arm64RelativePatcherTestDefault, BakerRootGcRoot) { // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg. ASSERT_GE(output_.size() - thunk_offset, 4u); - ASSERT_EQ(0x34000000 | root_reg, GetOutputInsn(thunk_offset) & 0xff00001f); + ASSERT_EQ(0x34000000u | root_reg, GetOutputInsn(thunk_offset) & 0xff00001fu); // Do not check the rest of the implementation. // The next thunk follows on the next aligned offset. diff --git a/compiler/linker/mips/relative_patcher_mips32r6_test.cc b/compiler/linker/mips/relative_patcher_mips32r6_test.cc index 474eb73e08..63ad8a58d5 100644 --- a/compiler/linker/mips/relative_patcher_mips32r6_test.cc +++ b/compiler/linker/mips/relative_patcher_mips32r6_test.cc @@ -37,7 +37,7 @@ class Mips32r6RelativePatcherTest : public RelativePatcherTest { } void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset); - void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset); + void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset); void TestStringReference(uint32_t string_offset); }; @@ -69,14 +69,15 @@ void Mips32r6RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const Link EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); } -void Mips32r6RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin, - uint32_t element_offset) { - dex_cache_arrays_begin_ = dex_cache_arrays_begin; +void Mips32r6RelativePatcherTest::TestStringBssEntry(uint32_t bss_begin, + uint32_t string_entry_offset) { + constexpr uint32_t kStringIndex = 1u; + string_index_to_offset_map_.Put(kStringIndex, string_entry_offset); + bss_begin_ = bss_begin; LinkerPatch patches[] = { - LinkerPatch::DexCacheArrayPatch(kLiteralOffset, nullptr, kAnchorOffset, element_offset) + LinkerPatch::StringBssEntryPatch(kLiteralOffset, nullptr, kAnchorOffset, kStringIndex) }; - CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), - dex_cache_arrays_begin_ + element_offset); + CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset); } void Mips32r6RelativePatcherTest::TestStringReference(uint32_t string_offset) { @@ -88,8 +89,8 @@ void Mips32r6RelativePatcherTest::TestStringReference(uint32_t string_offset) { CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset); } -TEST_F(Mips32r6RelativePatcherTest, DexCacheReference) { - TestDexCacheReference(/* dex_cache_arrays_begin */ 0x12345678, /* element_offset */ 0x1234); +TEST_F(Mips32r6RelativePatcherTest, StringBssEntry) { + TestStringBssEntry(/* bss_begin */ 0x12345678, /* string_entry_offset */ 0x1234); } TEST_F(Mips32r6RelativePatcherTest, StringReference) { diff --git a/compiler/linker/mips/relative_patcher_mips_test.cc b/compiler/linker/mips/relative_patcher_mips_test.cc index b0d1294cf4..961b31266f 100644 --- a/compiler/linker/mips/relative_patcher_mips_test.cc +++ b/compiler/linker/mips/relative_patcher_mips_test.cc @@ -20,10 +20,6 @@ namespace art { namespace linker { -// We'll maximize the range of a single load instruction for dex cache array accesses -// by aligning offset -32768 with the offset of the first used element. -static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000; - class MipsRelativePatcherTest : public RelativePatcherTest { public: MipsRelativePatcherTest() : RelativePatcherTest(kMips, "mips32r2") {} @@ -41,7 +37,7 @@ class MipsRelativePatcherTest : public RelativePatcherTest { } void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset); - void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset); + void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset); void TestStringReference(uint32_t string_offset); }; @@ -65,9 +61,7 @@ void MipsRelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPa ASSERT_TRUE(result.first); uint32_t diff = target_offset - (result.second + kAnchorOffset); - if (patches[0].GetType() == LinkerPatch::Type::kDexCacheArray) { - diff += kDexCacheArrayLwOffset; - } + CHECK_NE(patches[0].GetType(), LinkerPatch::Type::kDexCacheArray); diff += (diff & 0x8000) << 1; // Account for sign extension in addiu. const uint8_t expected_code[] = { @@ -79,14 +73,15 @@ void MipsRelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPa EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); } -void MipsRelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin, - uint32_t element_offset) { - dex_cache_arrays_begin_ = dex_cache_arrays_begin; +void MipsRelativePatcherTest::TestStringBssEntry(uint32_t bss_begin, + uint32_t string_entry_offset) { + constexpr uint32_t kStringIndex = 1u; + string_index_to_offset_map_.Put(kStringIndex, string_entry_offset); + bss_begin_ = bss_begin; LinkerPatch patches[] = { - LinkerPatch::DexCacheArrayPatch(kLiteralOffset, nullptr, kAnchorOffset, element_offset) + LinkerPatch::StringBssEntryPatch(kLiteralOffset, nullptr, kAnchorOffset, kStringIndex) }; - CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), - dex_cache_arrays_begin_ + element_offset); + CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset); } void MipsRelativePatcherTest::TestStringReference(uint32_t string_offset) { @@ -98,8 +93,8 @@ void MipsRelativePatcherTest::TestStringReference(uint32_t string_offset) { CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset); } -TEST_F(MipsRelativePatcherTest, DexCacheReference) { - TestDexCacheReference(/* dex_cache_arrays_begin */ 0x12345678, /* element_offset */ 0x1234); +TEST_F(MipsRelativePatcherTest, StringBssEntry) { + TestStringBssEntry(/* bss_begin */ 0x12345678, /* string_entry_offset */ 0x1234); } TEST_F(MipsRelativePatcherTest, StringReference) { diff --git a/compiler/linker/mips64/relative_patcher_mips64_test.cc b/compiler/linker/mips64/relative_patcher_mips64_test.cc index c3170584e4..9c9e24a96e 100644 --- a/compiler/linker/mips64/relative_patcher_mips64_test.cc +++ b/compiler/linker/mips64/relative_patcher_mips64_test.cc @@ -39,7 +39,7 @@ class Mips64RelativePatcherTest : public RelativePatcherTest { } void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset); - void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset); + void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset); void TestStringReference(uint32_t string_offset); }; @@ -76,18 +76,19 @@ void Mips64RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const Linker EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); } -void Mips64RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin, - uint32_t element_offset) { - dex_cache_arrays_begin_ = dex_cache_arrays_begin; +void Mips64RelativePatcherTest::TestStringBssEntry(uint32_t bss_begin, + uint32_t string_entry_offset) { + constexpr uint32_t kStringIndex = 1u; + string_index_to_offset_map_.Put(kStringIndex, string_entry_offset); + bss_begin_ = bss_begin; LinkerPatch patches[] = { - LinkerPatch::DexCacheArrayPatch(kLiteralOffset, nullptr, kAnchorOffset, element_offset) + LinkerPatch::StringBssEntryPatch(kLiteralOffset, nullptr, kAnchorOffset, kStringIndex) }; - CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), - dex_cache_arrays_begin_ + element_offset); + CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset); } -TEST_F(Mips64RelativePatcherTest, DexCacheReference) { - TestDexCacheReference(/* dex_cache_arrays_begin */ 0x12345678, /* element_offset */ 0x1234); +TEST_F(Mips64RelativePatcherTest, StringBssEntry) { + TestStringBssEntry(/* bss_begin */ 0x12345678, /* string_entry_offset */ 0x1234); } TEST_F(Mips64RelativePatcherTest, CallOther) { diff --git a/compiler/linker/multi_oat_relative_patcher.h b/compiler/linker/multi_oat_relative_patcher.h index 247b29017e..bdc1ee1d27 100644 --- a/compiler/linker/multi_oat_relative_patcher.h +++ b/compiler/linker/multi_oat_relative_patcher.h @@ -102,7 +102,7 @@ class MultiOatRelativePatcher FINAL { relative_patcher_->PatchCall(code, literal_offset, patch_offset, target_offset); } - // Wrapper around RelativePatcher::PatchDexCacheReference(), doing offset adjustment. + // Wrapper around RelativePatcher::PatchPcRelativeReference(), doing offset adjustment. void PatchPcRelativeReference(std::vector<uint8_t>* code, const LinkerPatch& patch, uint32_t patch_offset, diff --git a/compiler/linker/multi_oat_relative_patcher_test.cc b/compiler/linker/multi_oat_relative_patcher_test.cc index 951588a857..615b2b97be 100644 --- a/compiler/linker/multi_oat_relative_patcher_test.cc +++ b/compiler/linker/multi_oat_relative_patcher_test.cc @@ -282,7 +282,7 @@ TEST_F(MultiOatRelativePatcherTest, Patch) { uint32_t method2_patch_offset = 0x7654u; uint32_t method2_target_offset = 0xccccu; LinkerPatch method2_patch = - LinkerPatch::DexCacheArrayPatch(method2_literal_offset, nullptr, 0u, 1234u); + LinkerPatch::StringBssEntryPatch(method2_literal_offset, nullptr, 0u, 1u); patcher_.PatchPcRelativeReference( &code, method2_patch, method2_patch_offset, method2_target_offset); DCHECK_EQ(method2_literal_offset, mock_->last_literal_offset_); diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h index d9a87a0cfd..bff68080c6 100644 --- a/compiler/linker/relative_patcher_test.h +++ b/compiler/linker/relative_patcher_test.h @@ -31,6 +31,7 @@ #include "method_reference.h" #include "oat.h" #include "oat_quick_method_header.h" +#include "string_reference.h" #include "vector_output_stream.h" namespace art { @@ -61,7 +62,7 @@ class RelativePatcherTest : public testing::Test { features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)), method_offset_map_(), patcher_(RelativePatcher::Create(instruction_set, features_.get(), &method_offset_map_)), - dex_cache_arrays_begin_(0u), + bss_begin_(0u), compiled_method_refs_(), compiled_methods_(), patched_code_(), @@ -157,8 +158,9 @@ class RelativePatcherTest : public testing::Test { result.first ? result.second : kTrampolineOffset + compiled_method->CodeDelta(); patcher_->PatchCall(&patched_code_, patch.LiteralOffset(), offset + patch.LiteralOffset(), target_offset); - } else if (patch.GetType() == LinkerPatch::Type::kDexCacheArray) { - uint32_t target_offset = dex_cache_arrays_begin_ + patch.TargetDexCacheElementOffset(); + } else if (patch.GetType() == LinkerPatch::Type::kStringBssEntry) { + uint32_t target_offset = + bss_begin_ + string_index_to_offset_map_.Get(patch.TargetStringIndex().index_); patcher_->PatchPcRelativeReference(&patched_code_, patch, offset + patch.LiteralOffset(), @@ -276,7 +278,7 @@ class RelativePatcherTest : public testing::Test { std::unique_ptr<const InstructionSetFeatures> features_; MethodOffsetMap method_offset_map_; std::unique_ptr<RelativePatcher> patcher_; - uint32_t dex_cache_arrays_begin_; + uint32_t bss_begin_; SafeMap<uint32_t, uint32_t> string_index_to_offset_map_; std::vector<MethodReference> compiled_method_refs_; std::vector<std::unique_ptr<CompiledMethod>> compiled_methods_; diff --git a/compiler/linker/x86/relative_patcher_x86_test.cc b/compiler/linker/x86/relative_patcher_x86_test.cc index 2a44b7990e..0bd9de8e15 100644 --- a/compiler/linker/x86/relative_patcher_x86_test.cc +++ b/compiler/linker/x86/relative_patcher_x86_test.cc @@ -107,9 +107,11 @@ TEST_F(X86RelativePatcherTest, CallTrampoline) { EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); } -TEST_F(X86RelativePatcherTest, DexCacheReference) { - dex_cache_arrays_begin_ = 0x12345678; - constexpr size_t kElementOffset = 0x1234; +TEST_F(X86RelativePatcherTest, StringBssEntry) { + bss_begin_ = 0x12345678; + constexpr size_t kStringEntryOffset = 0x1234; + constexpr uint32_t kStringIndex = 1u; + string_index_to_offset_map_.Put(kStringIndex, kStringEntryOffset); static const uint8_t raw_code[] = { 0xe8, 0x00, 0x00, 0x00, 0x00, // call +0 0x5b, // pop ebx @@ -118,15 +120,14 @@ TEST_F(X86RelativePatcherTest, DexCacheReference) { constexpr uint32_t anchor_offset = 5u; // After call +0. ArrayRef<const uint8_t> code(raw_code); LinkerPatch patches[] = { - LinkerPatch::DexCacheArrayPatch(code.size() - 4u, nullptr, anchor_offset, kElementOffset), + LinkerPatch::StringBssEntryPatch(code.size() - 4u, nullptr, anchor_offset, kStringIndex), }; AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches)); Link(); auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); ASSERT_TRUE(result.first); - uint32_t diff = - dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset); + uint32_t diff = bss_begin_ + kStringEntryOffset - (result.second + anchor_offset); static const uint8_t expected_code[] = { 0xe8, 0x00, 0x00, 0x00, 0x00, // call +0 0x5b, // pop ebx diff --git a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc index 2b46453255..6d6bb40fb4 100644 --- a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc +++ b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc @@ -127,19 +127,20 @@ TEST_F(X86_64RelativePatcherTest, CallTrampoline) { EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); } -TEST_F(X86_64RelativePatcherTest, DexCacheReference) { - dex_cache_arrays_begin_ = 0x12345678; - constexpr size_t kElementOffset = 0x1234; +TEST_F(X86_64RelativePatcherTest, StringBssEntry) { + bss_begin_ = 0x12345678; + constexpr size_t kStringEntryOffset = 0x1234; + constexpr uint32_t kStringIndex = 1u; + string_index_to_offset_map_.Put(kStringIndex, kStringEntryOffset); LinkerPatch patches[] = { - LinkerPatch::DexCacheArrayPatch(kDexCacheLoadCode.size() - 4u, nullptr, 0u, kElementOffset), + LinkerPatch::StringBssEntryPatch(kDexCacheLoadCode.size() - 4u, nullptr, 0u, kStringIndex), }; AddCompiledMethod(MethodRef(1u), kDexCacheLoadCode, ArrayRef<const LinkerPatch>(patches)); Link(); auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); ASSERT_TRUE(result.first); - uint32_t diff = - dex_cache_arrays_begin_ + kElementOffset - (result.second + kDexCacheLoadCode.size()); + uint32_t diff = bss_begin_ + kStringEntryOffset - (result.second + kDexCacheLoadCode.size()); static const uint8_t expected_code[] = { 0x8b, 0x05, static_cast<uint8_t>(diff), diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 6b5387ae19..fed2d34cdb 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -28,13 +28,12 @@ #include "base/stl_util.h" #include "base/unix_file/fd_file.h" #include "class_linker.h" -#include "compiled_class.h" #include "compiled_method.h" #include "debug/method_debug_info.h" #include "dex/verification_results.h" #include "dex_file-inl.h" #include "dexlayout.h" -#include "driver/compiler_driver.h" +#include "driver/compiler_driver-inl.h" #include "driver/compiler_options.h" #include "gc/space/image_space.h" #include "gc/space/space.h" @@ -712,17 +711,17 @@ class OatWriter::InitOatClassesMethodVisitor : public DexMethodVisitor { bool EndClass() { ClassReference class_ref(dex_file_, class_def_index_); - CompiledClass* compiled_class = writer_->compiler_driver_->GetCompiledClass(class_ref); mirror::Class::Status status; - if (compiled_class != nullptr) { - status = compiled_class->GetStatus(); - } else if (writer_->compiler_driver_->GetVerificationResults()->IsClassRejected(class_ref)) { - // The oat class status is used only for verification of resolved classes, - // so use kStatusErrorResolved whether the class was resolved or unresolved - // during compile-time verification. - status = mirror::Class::kStatusErrorResolved; - } else { - status = mirror::Class::kStatusNotReady; + bool found = writer_->compiler_driver_->GetCompiledClass(class_ref, &status); + if (!found) { + if (writer_->compiler_driver_->GetVerificationResults()->IsClassRejected(class_ref)) { + // The oat class status is used only for verification of resolved classes, + // so use kStatusErrorResolved whether the class was resolved or unresolved + // during compile-time verification. + status = mirror::Class::kStatusErrorResolved; + } else { + status = mirror::Class::kStatusNotReady; + } } writer_->oat_classes_.emplace_back(offset_, @@ -1332,19 +1331,12 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { PatchCodeAddress(&patched_code_, literal_offset, target_offset); break; } - case LinkerPatch::Type::kMethod: { - ArtMethod* method = GetTargetMethod(patch); - PatchMethodAddress(&patched_code_, literal_offset, method); - break; - } - case LinkerPatch::Type::kString: { - mirror::String* string = GetTargetString(patch); - PatchObjectAddress(&patched_code_, literal_offset, string); - break; - } - case LinkerPatch::Type::kType: { - mirror::Class* type = GetTargetType(patch); - PatchObjectAddress(&patched_code_, literal_offset, type); + case LinkerPatch::Type::kMethodRelative: { + uint32_t target_offset = GetTargetMethodOffset(GetTargetMethod(patch)); + writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_, + patch, + offset_ + literal_offset, + target_offset); break; } case LinkerPatch::Type::kBakerReadBarrierBranch: { @@ -1468,6 +1460,15 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } } + uint32_t GetTargetMethodOffset(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(writer_->HasBootImage()); + method = writer_->image_writer_->GetImageMethodAddress(method); + size_t oat_index = writer_->image_writer_->GetOatIndexForDexFile(dex_file_); + uintptr_t oat_data_begin = writer_->image_writer_->GetOatDataBegin(oat_index); + // TODO: Clean up offset types. The target offset must be treated as signed. + return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(method) - oat_data_begin); + } + uint32_t GetTargetObjectOffset(mirror::Object* object) REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(writer_->HasBootImage()); object = writer_->image_writer_->GetImageAddress(object); @@ -1497,34 +1498,6 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { data[3] = (address >> 24) & 0xffu; } - void PatchMethodAddress(std::vector<uint8_t>* code, uint32_t offset, ArtMethod* method) - REQUIRES_SHARED(Locks::mutator_lock_) { - if (writer_->HasBootImage()) { - method = writer_->image_writer_->GetImageMethodAddress(method); - } else if (kIsDebugBuild) { - // NOTE: We're using linker patches for app->boot references when the image can - // be relocated and therefore we need to emit .oat_patches. We're not using this - // for app->app references, so check that the method is an image method. - std::vector<gc::space::ImageSpace*> image_spaces = - Runtime::Current()->GetHeap()->GetBootImageSpaces(); - bool contains_method = false; - for (gc::space::ImageSpace* image_space : image_spaces) { - size_t method_offset = reinterpret_cast<const uint8_t*>(method) - image_space->Begin(); - contains_method |= - image_space->GetImageHeader().GetMethodsSection().Contains(method_offset); - } - CHECK(contains_method); - } - // Note: We only patch targeting ArtMethods in image which is in the low 4gb. - uint32_t address = PointerToLowMemUInt32(method); - DCHECK_LE(offset + 4, code->size()); - uint8_t* data = &(*code)[offset]; - data[0] = address & 0xffu; - data[1] = (address >> 8) & 0xffu; - data[2] = (address >> 16) & 0xffu; - data[3] = (address >> 24) & 0xffu; - } - void PatchCodeAddress(std::vector<uint8_t>* code, uint32_t offset, uint32_t target_offset) REQUIRES_SHARED(Locks::mutator_lock_) { uint32_t address = target_offset; diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h index e778f75551..66b70ade2e 100644 --- a/compiler/oat_writer.h +++ b/compiler/oat_writer.h @@ -31,7 +31,7 @@ #include "os.h" #include "safe_map.h" #include "string_reference.h" -#include "utils/type_reference.h" +#include "type_reference.h" namespace art { diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc index 5e70a8284d..1e75f10ebe 100644 --- a/compiler/optimizing/block_builder.cc +++ b/compiler/optimizing/block_builder.cc @@ -310,16 +310,18 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { // least one predecessor is not covered by the same TryItem as the try block. // We do not split each edge separately, but rather create one boundary block // that all predecessors are relinked to. This preserves loop headers (b/23895756). - for (auto entry : try_block_info) { - HBasicBlock* try_block = graph_->GetBlocks()[entry.first]; + for (const auto& entry : try_block_info) { + uint32_t block_id = entry.first; + const DexFile::TryItem* try_item = entry.second; + HBasicBlock* try_block = graph_->GetBlocks()[block_id]; for (HBasicBlock* predecessor : try_block->GetPredecessors()) { - if (GetTryItem(predecessor, try_block_info) != entry.second) { + if (GetTryItem(predecessor, try_block_info) != try_item) { // Found a predecessor not covered by the same TryItem. Insert entering // boundary block. HTryBoundary* try_entry = new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kEntry, try_block->GetDexPc()); try_block->CreateImmediateDominator()->AddInstruction(try_entry); - LinkToCatchBlocks(try_entry, code_item_, entry.second, catch_blocks); + LinkToCatchBlocks(try_entry, code_item_, try_item, catch_blocks); break; } } @@ -327,8 +329,10 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { // Do a second pass over the try blocks and insert exit TryBoundaries where // the successor is not in the same TryItem. - for (auto entry : try_block_info) { - HBasicBlock* try_block = graph_->GetBlocks()[entry.first]; + for (const auto& entry : try_block_info) { + uint32_t block_id = entry.first; + const DexFile::TryItem* try_item = entry.second; + HBasicBlock* try_block = graph_->GetBlocks()[block_id]; // NOTE: Do not use iterators because SplitEdge would invalidate them. for (size_t i = 0, e = try_block->GetSuccessors().size(); i < e; ++i) { HBasicBlock* successor = try_block->GetSuccessors()[i]; @@ -337,7 +341,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { // covered by the same TryItem. Otherwise the previous pass would have // created a non-throwing boundary block. if (GetTryItem(successor, try_block_info) != nullptr) { - DCHECK_EQ(entry.second, GetTryItem(successor, try_block_info)); + DCHECK_EQ(try_item, GetTryItem(successor, try_block_info)); continue; } @@ -345,7 +349,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { HTryBoundary* try_exit = new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kExit, successor->GetDexPc()); graph_->SplitEdge(try_block, successor)->AddInstruction(try_exit); - LinkToCatchBlocks(try_exit, code_item_, entry.second, catch_blocks); + LinkToCatchBlocks(try_exit, code_item_, try_item, catch_blocks); } } } diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index ed630cda91..f3ecdf036a 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -1734,8 +1734,8 @@ class BCEVisitor : public HGraphVisitor { */ void InsertPhiNodes() { // Scan all new deoptimization blocks. - for (auto it1 = taken_test_loop_.begin(); it1 != taken_test_loop_.end(); ++it1) { - HBasicBlock* true_block = it1->second; + for (const auto& entry : taken_test_loop_) { + HBasicBlock* true_block = entry.second; HBasicBlock* new_preheader = true_block->GetSingleSuccessor(); // Scan all instructions in a new deoptimization block. for (HInstructionIterator it(true_block->GetInstructions()); !it.Done(); it.Advance()) { diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index cb6e14b2bd..a949c33149 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -43,7 +43,7 @@ class BoundsCheckEliminationTest : public testing::Test { void RunBCE() { graph_->BuildDominatorTree(); - InstructionSimplifier(graph_, /* codegen */ nullptr).Run(); + InstructionSimplifier(graph_, /* codegen */ nullptr, /* driver */ nullptr).Run(); SideEffectsAnalysis side_effects(graph_); side_effects.Run(); diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 5136d7d2b8..c918ee6687 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -58,7 +58,7 @@ #include "parallel_move_resolver.h" #include "ssa_liveness_analysis.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "utils/assembler.h" namespace art { @@ -145,7 +145,7 @@ size_t CodeGenerator::GetCacheOffset(uint32_t index) { } size_t CodeGenerator::GetCachePointerOffset(uint32_t index) { - auto pointer_size = InstructionSetPointerSize(GetInstructionSet()); + PointerSize pointer_size = InstructionSetPointerSize(GetInstructionSet()); return static_cast<size_t>(pointer_size) * index; } @@ -508,7 +508,7 @@ void CodeGenerator::GenerateUnresolvedFieldAccess( void CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls, Location runtime_type_index_location, Location runtime_return_location) { - DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kDexCacheViaMethod); + DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kRuntimeCall); DCHECK_EQ(cls->InputCount(), 1u); LocationSummary* locations = new (cls->GetBlock()->GetGraph()->GetArena()) LocationSummary( cls, LocationSummary::kCallOnMainOnly); @@ -518,7 +518,7 @@ void CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls, } void CodeGenerator::GenerateLoadClassRuntimeCall(HLoadClass* cls) { - DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kDexCacheViaMethod); + DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kRuntimeCall); LocationSummary* locations = cls->GetLocations(); MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_); if (cls->NeedsAccessCheck()) { @@ -557,6 +557,9 @@ void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const { } void CodeGenerator::AllocateLocations(HInstruction* instruction) { + for (HEnvironment* env = instruction->GetEnvironment(); env != nullptr; env = env->GetParent()) { + env->AllocateLocations(); + } instruction->Accept(GetLocationBuilder()); DCHECK(CheckTypeConsistency(instruction)); LocationSummary* locations = instruction->GetLocations(); @@ -1400,20 +1403,6 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) { locations->AddTemp(Location::RequiresRegister()); } -uint32_t CodeGenerator::GetReferenceSlowFlagOffset() const { - ScopedObjectAccess soa(Thread::Current()); - mirror::Class* klass = mirror::Reference::GetJavaLangRefReference(); - DCHECK(klass->IsInitialized()); - return klass->GetSlowPathFlagOffset().Uint32Value(); -} - -uint32_t CodeGenerator::GetReferenceDisableFlagOffset() const { - ScopedObjectAccess soa(Thread::Current()); - mirror::Class* klass = mirror::Reference::GetJavaLangRefReference(); - DCHECK(klass->IsInitialized()); - return klass->GetDisableIntrinsicFlagOffset().Uint32Value(); -} - void CodeGenerator::EmitJitRoots(uint8_t* code, Handle<mirror::ObjectArray<mirror::Object>> roots, const uint8_t* roots_data) { diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index ea463eeb62..c9ba5c3357 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -31,10 +31,11 @@ #include "nodes.h" #include "optimizing_compiler_stats.h" #include "read_barrier_option.h" +#include "stack.h" #include "stack_map_stream.h" #include "string_reference.h" +#include "type_reference.h" #include "utils/label.h" -#include "utils/type_reference.h" namespace art { @@ -541,7 +542,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { case HLoadString::LoadKind::kBssEntry: DCHECK(load->NeedsEnvironment()); return LocationSummary::kCallOnSlowPath; - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kRuntimeCall: DCHECK(load->NeedsEnvironment()); return LocationSummary::kCallOnMainOnly; case HLoadString::LoadKind::kJitTableAddress: @@ -572,9 +573,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { virtual void GenerateNop() = 0; - uint32_t GetReferenceSlowFlagOffset() const; - uint32_t GetReferenceDisableFlagOffset() const; - static QuickEntrypointEnum GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass); protected: @@ -842,7 +840,7 @@ class SlowPathGenerator { const uint32_t dex_pc = instruction->GetDexPc(); auto iter = slow_path_map_.find(dex_pc); if (iter != slow_path_map_.end()) { - auto candidates = iter->second; + const ArenaVector<std::pair<InstructionType*, SlowPathCode*>>& candidates = iter->second; for (const auto& it : candidates) { InstructionType* other_instruction = it.first; SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 419b2afe91..914ae177c4 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -16,6 +16,7 @@ #include "code_generator_arm.h" +#include "arch/arm/asm_support_arm.h" #include "arch/arm/instruction_set_features_arm.h" #include "art_method.h" #include "code_generator_utils.h" @@ -25,6 +26,7 @@ #include "gc/accounting/card_table.h" #include "intrinsics.h" #include "intrinsics_arm.h" +#include "linker/arm/relative_patcher_thumb2.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "thread.h" @@ -60,10 +62,45 @@ static constexpr DRegister DTMP = D31; static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; +// Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle +// offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions. +// For the Baker read barrier implementation using link-generated thunks we need to split +// the offset explicitly. +constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB; + +// Flags controlling the use of link-time generated thunks for Baker read barriers. +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true; +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; + +// The reserved entrypoint register for link-time generated thunks. +const Register kBakerCcEntrypointRegister = R4; + // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value() +static inline void CheckLastTempIsBakerCcEntrypointRegister(HInstruction* instruction) { + DCHECK_EQ(static_cast<uint32_t>(kBakerCcEntrypointRegister), + linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister); + DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u); + DCHECK_EQ(kBakerCcEntrypointRegister, + instruction->GetLocations()->GetTemp( + instruction->GetLocations()->GetTempCount() - 1u).AsRegister<Register>()); +} + +static inline void EmitPlaceholderBne(CodeGeneratorARM* codegen, Label* bne_label) { + ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(codegen->GetAssembler())); + __ BindTrackedLabel(bne_label); + Label placeholder_label; + __ b(&placeholder_label, NE); // Placeholder, patched at link-time. + __ Bind(&placeholder_label); +} + +static inline bool CanEmitNarrowLdr(Register rt, Register rn, uint32_t offset) { + return ArmAssembler::IsLowRegister(rt) && ArmAssembler::IsLowRegister(rn) && offset < 32u; +} + static constexpr int kRegListThreshold = 4; // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers, @@ -824,7 +861,7 @@ class LoadReferenceWithBakerReadBarrierSlowPathARM : public ReadBarrierMarkSlowP // Baker's read barriers, we need to perform the load of // mirror::Object::monitor_ *before* the original reference load. // This load-load ordering is required by the read barrier. - // The fast path/slow path (for Baker's algorithm) should look like: + // The slow path (for Baker's algorithm) should look like: // // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering @@ -959,6 +996,18 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM __ Bind(GetEntryLabel()); + // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM's: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // old_ref = ref; + // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); + // } + // /* int32_t */ monitor = obj->monitor_ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); __ LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset); @@ -1607,6 +1656,34 @@ static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARM* codegen) { } } +static int64_t AdjustConstantForCondition(int64_t value, + IfCondition* condition, + IfCondition* opposite) { + if (value == 1) { + if (*condition == kCondB) { + value = 0; + *condition = kCondEQ; + *opposite = kCondNE; + } else if (*condition == kCondAE) { + value = 0; + *condition = kCondNE; + *opposite = kCondEQ; + } + } else if (value == -1) { + if (*condition == kCondGT) { + value = 0; + *condition = kCondGE; + *opposite = kCondLT; + } else if (*condition == kCondLE) { + value = 0; + *condition = kCondLT; + *opposite = kCondGE; + } + } + + return value; +} + static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* condition, bool invert, CodeGeneratorARM* codegen) { @@ -1620,7 +1697,7 @@ static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* cond std::swap(cond, opposite); } - std::pair<Condition, Condition> ret; + std::pair<Condition, Condition> ret(EQ, NE); const Location left = locations->InAt(0); const Location right = locations->InAt(1); @@ -1628,7 +1705,38 @@ static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* cond const Register left_high = left.AsRegisterPairHigh<Register>(); const Register left_low = left.AsRegisterPairLow<Register>(); - int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); + int64_t value = AdjustConstantForCondition(right.GetConstant()->AsLongConstant()->GetValue(), + &cond, + &opposite); + + // Comparisons against 0 are common enough to deserve special attention. + if (value == 0) { + switch (cond) { + case kCondNE: + // x > 0 iff x != 0 when the comparison is unsigned. + case kCondA: + ret = std::make_pair(NE, EQ); + FALLTHROUGH_INTENDED; + case kCondEQ: + // x <= 0 iff x == 0 when the comparison is unsigned. + case kCondBE: + __ orrs(IP, left_low, ShifterOperand(left_high)); + return ret; + case kCondLT: + case kCondGE: + __ cmp(left_high, ShifterOperand(0)); + return std::make_pair(ARMCondition(cond), ARMCondition(opposite)); + // Trivially true or false. + case kCondB: + ret = std::make_pair(NE, EQ); + FALLTHROUGH_INTENDED; + case kCondAE: + __ cmp(left_low, ShifterOperand(left_low)); + return ret; + default: + break; + } + } switch (cond) { case kCondEQ: @@ -1788,10 +1896,14 @@ static std::pair<Condition, Condition> GenerateTest(HCondition* condition, static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) { if (condition->GetLeft()->GetType() == Primitive::kPrimLong) { const LocationSummary* const locations = condition->GetLocations(); - const IfCondition c = condition->GetCondition(); if (locations->InAt(1).IsConstant()) { - const int64_t value = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue(); + IfCondition c = condition->GetCondition(); + IfCondition opposite = condition->GetOppositeCondition(); + const int64_t value = AdjustConstantForCondition( + Int64FromConstant(locations->InAt(1).GetConstant()), + &c, + &opposite); ShifterOperand so; if (c < kCondLT || c > kCondGE) { @@ -1799,9 +1911,11 @@ static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) { // we check that the least significant half of the first input to be compared // is in a low register (the other half is read outside an IT block), and // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP - // encoding can be used. - if (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) || - !IsUint<8>(Low32Bits(value))) { + // encoding can be used; 0 is always handled, no matter what registers are + // used by the first input. + if (value != 0 && + (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) || + !IsUint<8>(Low32Bits(value)))) { return false; } } else if (c == kCondLE || c == kCondGT) { @@ -1828,6 +1942,329 @@ static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) { return true; } +static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARM* codegen) { + DCHECK(CanGenerateTest(cond, codegen->GetAssembler())); + + const Register out = cond->GetLocations()->Out().AsRegister<Register>(); + const auto condition = GenerateTest(cond, false, codegen); + + __ mov(out, ShifterOperand(0), AL, kCcKeep); + + if (ArmAssembler::IsLowRegister(out)) { + __ it(condition.first); + __ mov(out, ShifterOperand(1), condition.first); + } else { + Label done_label; + Label* const final_label = codegen->GetFinalLabel(cond, &done_label); + + __ b(final_label, condition.second); + __ LoadImmediate(out, 1); + + if (done_label.IsLinked()) { + __ Bind(&done_label); + } + } +} + +static void GenerateEqualLong(HCondition* cond, CodeGeneratorARM* codegen) { + DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong); + + const LocationSummary* const locations = cond->GetLocations(); + IfCondition condition = cond->GetCondition(); + const Register out = locations->Out().AsRegister<Register>(); + const Location left = locations->InAt(0); + const Location right = locations->InAt(1); + Register left_high = left.AsRegisterPairHigh<Register>(); + Register left_low = left.AsRegisterPairLow<Register>(); + + if (right.IsConstant()) { + IfCondition opposite = cond->GetOppositeCondition(); + const int64_t value = AdjustConstantForCondition(Int64FromConstant(right.GetConstant()), + &condition, + &opposite); + int32_t value_high = -High32Bits(value); + int32_t value_low = -Low32Bits(value); + + // The output uses Location::kNoOutputOverlap. + if (out == left_high) { + std::swap(left_low, left_high); + std::swap(value_low, value_high); + } + + __ AddConstant(out, left_low, value_low); + __ AddConstant(IP, left_high, value_high); + } else { + DCHECK(right.IsRegisterPair()); + __ sub(IP, left_high, ShifterOperand(right.AsRegisterPairHigh<Register>())); + __ sub(out, left_low, ShifterOperand(right.AsRegisterPairLow<Register>())); + } + + // Need to check after calling AdjustConstantForCondition(). + DCHECK(condition == kCondEQ || condition == kCondNE) << condition; + + if (condition == kCondNE && ArmAssembler::IsLowRegister(out)) { + __ orrs(out, out, ShifterOperand(IP)); + __ it(NE); + __ mov(out, ShifterOperand(1), NE); + } else { + __ orr(out, out, ShifterOperand(IP)); + codegen->GenerateConditionWithZero(condition, out, out, IP); + } +} + +static void GenerateLongComparesAndJumps(HCondition* cond, + Label* true_label, + Label* false_label, + CodeGeneratorARM* codegen) { + LocationSummary* locations = cond->GetLocations(); + Location left = locations->InAt(0); + Location right = locations->InAt(1); + IfCondition if_cond = cond->GetCondition(); + + Register left_high = left.AsRegisterPairHigh<Register>(); + Register left_low = left.AsRegisterPairLow<Register>(); + IfCondition true_high_cond = if_cond; + IfCondition false_high_cond = cond->GetOppositeCondition(); + Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part + + // Set the conditions for the test, remembering that == needs to be + // decided using the low words. + switch (if_cond) { + case kCondEQ: + case kCondNE: + // Nothing to do. + break; + case kCondLT: + false_high_cond = kCondGT; + break; + case kCondLE: + true_high_cond = kCondLT; + break; + case kCondGT: + false_high_cond = kCondLT; + break; + case kCondGE: + true_high_cond = kCondGT; + break; + case kCondB: + false_high_cond = kCondA; + break; + case kCondBE: + true_high_cond = kCondB; + break; + case kCondA: + false_high_cond = kCondB; + break; + case kCondAE: + true_high_cond = kCondA; + break; + } + if (right.IsConstant()) { + int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); + int32_t val_low = Low32Bits(value); + int32_t val_high = High32Bits(value); + + __ CmpConstant(left_high, val_high); + if (if_cond == kCondNE) { + __ b(true_label, ARMCondition(true_high_cond)); + } else if (if_cond == kCondEQ) { + __ b(false_label, ARMCondition(false_high_cond)); + } else { + __ b(true_label, ARMCondition(true_high_cond)); + __ b(false_label, ARMCondition(false_high_cond)); + } + // Must be equal high, so compare the lows. + __ CmpConstant(left_low, val_low); + } else { + Register right_high = right.AsRegisterPairHigh<Register>(); + Register right_low = right.AsRegisterPairLow<Register>(); + + __ cmp(left_high, ShifterOperand(right_high)); + if (if_cond == kCondNE) { + __ b(true_label, ARMCondition(true_high_cond)); + } else if (if_cond == kCondEQ) { + __ b(false_label, ARMCondition(false_high_cond)); + } else { + __ b(true_label, ARMCondition(true_high_cond)); + __ b(false_label, ARMCondition(false_high_cond)); + } + // Must be equal high, so compare the lows. + __ cmp(left_low, ShifterOperand(right_low)); + } + // The last comparison might be unsigned. + // TODO: optimize cases where this is always true/false + __ b(true_label, final_condition); +} + +static void GenerateConditionLong(HCondition* cond, CodeGeneratorARM* codegen) { + DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong); + + const LocationSummary* const locations = cond->GetLocations(); + IfCondition condition = cond->GetCondition(); + const Register out = locations->Out().AsRegister<Register>(); + const Location left = locations->InAt(0); + const Location right = locations->InAt(1); + + if (right.IsConstant()) { + IfCondition opposite = cond->GetOppositeCondition(); + + // Comparisons against 0 are common enough to deserve special attention. + if (AdjustConstantForCondition(Int64FromConstant(right.GetConstant()), + &condition, + &opposite) == 0) { + switch (condition) { + case kCondNE: + case kCondA: + if (ArmAssembler::IsLowRegister(out)) { + // We only care if both input registers are 0 or not. + __ orrs(out, + left.AsRegisterPairLow<Register>(), + ShifterOperand(left.AsRegisterPairHigh<Register>())); + __ it(NE); + __ mov(out, ShifterOperand(1), NE); + return; + } + + FALLTHROUGH_INTENDED; + case kCondEQ: + case kCondBE: + // We only care if both input registers are 0 or not. + __ orr(out, + left.AsRegisterPairLow<Register>(), + ShifterOperand(left.AsRegisterPairHigh<Register>())); + codegen->GenerateConditionWithZero(condition, out, out); + return; + case kCondLT: + case kCondGE: + // We only care about the sign bit. + FALLTHROUGH_INTENDED; + case kCondAE: + case kCondB: + codegen->GenerateConditionWithZero(condition, out, left.AsRegisterPairHigh<Register>()); + return; + case kCondLE: + case kCondGT: + default: + break; + } + } + } + + if ((condition == kCondEQ || condition == kCondNE) && + // If `out` is a low register, then the GenerateConditionGeneric() + // function generates a shorter code sequence that is still branchless. + (!ArmAssembler::IsLowRegister(out) || !CanGenerateTest(cond, codegen->GetAssembler()))) { + GenerateEqualLong(cond, codegen); + return; + } + + if (CanGenerateTest(cond, codegen->GetAssembler())) { + GenerateConditionGeneric(cond, codegen); + return; + } + + // Convert the jumps into the result. + Label done_label; + Label* const final_label = codegen->GetFinalLabel(cond, &done_label); + Label true_label, false_label; + + GenerateLongComparesAndJumps(cond, &true_label, &false_label, codegen); + + // False case: result = 0. + __ Bind(&false_label); + __ mov(out, ShifterOperand(0)); + __ b(final_label); + + // True case: result = 1. + __ Bind(&true_label); + __ mov(out, ShifterOperand(1)); + + if (done_label.IsLinked()) { + __ Bind(&done_label); + } +} + +static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, CodeGeneratorARM* codegen) { + const Primitive::Type type = cond->GetLeft()->GetType(); + + DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; + + if (type == Primitive::kPrimLong) { + GenerateConditionLong(cond, codegen); + return; + } + + const LocationSummary* const locations = cond->GetLocations(); + IfCondition condition = cond->GetCondition(); + Register in = locations->InAt(0).AsRegister<Register>(); + const Register out = locations->Out().AsRegister<Register>(); + const Location right = cond->GetLocations()->InAt(1); + int64_t value; + + if (right.IsConstant()) { + IfCondition opposite = cond->GetOppositeCondition(); + + value = AdjustConstantForCondition(Int64FromConstant(right.GetConstant()), + &condition, + &opposite); + + // Comparisons against 0 are common enough to deserve special attention. + if (value == 0) { + switch (condition) { + case kCondNE: + case kCondA: + if (ArmAssembler::IsLowRegister(out) && out == in) { + __ cmp(out, ShifterOperand(0)); + __ it(NE); + __ mov(out, ShifterOperand(1), NE); + return; + } + + FALLTHROUGH_INTENDED; + case kCondEQ: + case kCondBE: + case kCondLT: + case kCondGE: + case kCondAE: + case kCondB: + codegen->GenerateConditionWithZero(condition, out, in); + return; + case kCondLE: + case kCondGT: + default: + break; + } + } + } + + if (condition == kCondEQ || condition == kCondNE) { + ShifterOperand operand; + + if (right.IsConstant()) { + operand = ShifterOperand(value); + } else if (out == right.AsRegister<Register>()) { + // Avoid 32-bit instructions if possible. + operand = ShifterOperand(in); + in = right.AsRegister<Register>(); + } else { + operand = ShifterOperand(right.AsRegister<Register>()); + } + + if (condition == kCondNE && ArmAssembler::IsLowRegister(out)) { + __ subs(out, in, operand); + __ it(NE); + __ mov(out, ShifterOperand(1), NE); + } else { + __ sub(out, in, operand); + codegen->GenerateConditionWithZero(condition, out, out); + } + + return; + } + + GenerateConditionGeneric(cond, codegen); +} + static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) { const Primitive::Type type = constant->GetType(); bool ret = false; @@ -1960,13 +2397,11 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, uint32_literals_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - boot_image_string_patches_(StringReferenceValueComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - boot_image_type_patches_(TypeReferenceValueComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), @@ -2433,89 +2868,6 @@ void LocationsBuilderARM::VisitExit(HExit* exit) { void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { } -void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond, - Label* true_label, - Label* false_label) { - LocationSummary* locations = cond->GetLocations(); - Location left = locations->InAt(0); - Location right = locations->InAt(1); - IfCondition if_cond = cond->GetCondition(); - - Register left_high = left.AsRegisterPairHigh<Register>(); - Register left_low = left.AsRegisterPairLow<Register>(); - IfCondition true_high_cond = if_cond; - IfCondition false_high_cond = cond->GetOppositeCondition(); - Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part - - // Set the conditions for the test, remembering that == needs to be - // decided using the low words. - switch (if_cond) { - case kCondEQ: - case kCondNE: - // Nothing to do. - break; - case kCondLT: - false_high_cond = kCondGT; - break; - case kCondLE: - true_high_cond = kCondLT; - break; - case kCondGT: - false_high_cond = kCondLT; - break; - case kCondGE: - true_high_cond = kCondGT; - break; - case kCondB: - false_high_cond = kCondA; - break; - case kCondBE: - true_high_cond = kCondB; - break; - case kCondA: - false_high_cond = kCondB; - break; - case kCondAE: - true_high_cond = kCondA; - break; - } - if (right.IsConstant()) { - int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); - int32_t val_low = Low32Bits(value); - int32_t val_high = High32Bits(value); - - __ CmpConstant(left_high, val_high); - if (if_cond == kCondNE) { - __ b(true_label, ARMCondition(true_high_cond)); - } else if (if_cond == kCondEQ) { - __ b(false_label, ARMCondition(false_high_cond)); - } else { - __ b(true_label, ARMCondition(true_high_cond)); - __ b(false_label, ARMCondition(false_high_cond)); - } - // Must be equal high, so compare the lows. - __ CmpConstant(left_low, val_low); - } else { - Register right_high = right.AsRegisterPairHigh<Register>(); - Register right_low = right.AsRegisterPairLow<Register>(); - - __ cmp(left_high, ShifterOperand(right_high)); - if (if_cond == kCondNE) { - __ b(true_label, ARMCondition(true_high_cond)); - } else if (if_cond == kCondEQ) { - __ b(false_label, ARMCondition(false_high_cond)); - } else { - __ b(true_label, ARMCondition(true_high_cond)); - __ b(false_label, ARMCondition(false_high_cond)); - } - // Must be equal high, so compare the lows. - __ cmp(left_low, ShifterOperand(right_low)); - } - // The last comparison might be unsigned. - // TODO: optimize cases where this is always true/false - __ b(true_label, final_condition); -} - void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condition, Label* true_target_in, Label* false_target_in) { @@ -2557,7 +2909,7 @@ void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condi Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong); - GenerateLongComparesAndJumps(condition, true_target, false_target); + GenerateLongComparesAndJumps(condition, true_target, false_target, codegen_); if (false_target != &fallthrough_target) { __ b(false_target); @@ -2872,6 +3224,80 @@ void CodeGeneratorARM::GenerateNop() { __ nop(); } +// `temp` is an extra temporary register that is used for some conditions; +// callers may not specify it, in which case the method will use a scratch +// register instead. +void CodeGeneratorARM::GenerateConditionWithZero(IfCondition condition, + Register out, + Register in, + Register temp) { + switch (condition) { + case kCondEQ: + // x <= 0 iff x == 0 when the comparison is unsigned. + case kCondBE: + if (temp == kNoRegister || (ArmAssembler::IsLowRegister(out) && out != in)) { + temp = out; + } + + // Avoid 32-bit instructions if possible; note that `in` and `temp` must be + // different as well. + if (ArmAssembler::IsLowRegister(in) && ArmAssembler::IsLowRegister(temp) && in != temp) { + // temp = - in; only 0 sets the carry flag. + __ rsbs(temp, in, ShifterOperand(0)); + + if (out == in) { + std::swap(in, temp); + } + + // out = - in + in + carry = carry + __ adc(out, temp, ShifterOperand(in)); + } else { + // If `in` is 0, then it has 32 leading zeros, and less than that otherwise. + __ clz(out, in); + // Any number less than 32 logically shifted right by 5 bits results in 0; + // the same operation on 32 yields 1. + __ Lsr(out, out, 5); + } + + break; + case kCondNE: + // x > 0 iff x != 0 when the comparison is unsigned. + case kCondA: + if (out == in) { + if (temp == kNoRegister || in == temp) { + temp = IP; + } + } else if (temp == kNoRegister || !ArmAssembler::IsLowRegister(temp)) { + temp = out; + } + + // temp = in - 1; only 0 does not set the carry flag. + __ subs(temp, in, ShifterOperand(1)); + // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry + __ sbc(out, in, ShifterOperand(temp)); + break; + case kCondGE: + __ mvn(out, ShifterOperand(in)); + in = out; + FALLTHROUGH_INTENDED; + case kCondLT: + // We only care about the sign bit. + __ Lsr(out, in, 31); + break; + case kCondAE: + // Trivially true. + __ mov(out, ShifterOperand(1)); + break; + case kCondB: + // Trivially false. + __ mov(out, ShifterOperand(0)); + break; + default: + LOG(FATAL) << "Unexpected condition " << condition; + UNREACHABLE(); + } +} + void LocationsBuilderARM::HandleCondition(HCondition* cond) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall); @@ -2908,48 +3334,48 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) { return; } - const Register out = cond->GetLocations()->Out().AsRegister<Register>(); - - if (ArmAssembler::IsLowRegister(out) && CanGenerateTest(cond, codegen_->GetAssembler())) { - const auto condition = GenerateTest(cond, false, codegen_); + const Primitive::Type type = cond->GetLeft()->GetType(); - __ it(condition.first); - __ mov(out, ShifterOperand(1), condition.first); - __ it(condition.second); - __ mov(out, ShifterOperand(0), condition.second); + if (Primitive::IsFloatingPointType(type)) { + GenerateConditionGeneric(cond, codegen_); return; } - // Convert the jumps into the result. - Label done_label; - Label* const final_label = codegen_->GetFinalLabel(cond, &done_label); + DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; - if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) { - Label true_label, false_label; + const IfCondition condition = cond->GetCondition(); - GenerateLongComparesAndJumps(cond, &true_label, &false_label); + // A condition with only one boolean input, or two boolean inputs without being equality or + // inequality results from transformations done by the instruction simplifier, and is handled + // as a regular condition with integral inputs. + if (type == Primitive::kPrimBoolean && + cond->GetRight()->GetType() == Primitive::kPrimBoolean && + (condition == kCondEQ || condition == kCondNE)) { + const LocationSummary* const locations = cond->GetLocations(); + Register left = locations->InAt(0).AsRegister<Register>(); + const Register out = locations->Out().AsRegister<Register>(); + const Location right_loc = locations->InAt(1); - // False case: result = 0. - __ Bind(&false_label); - __ LoadImmediate(out, 0); - __ b(final_label); + // The constant case is handled by the instruction simplifier. + DCHECK(!right_loc.IsConstant()); - // True case: result = 1. - __ Bind(&true_label); - __ LoadImmediate(out, 1); - } else { - DCHECK(CanGenerateTest(cond, codegen_->GetAssembler())); + Register right = right_loc.AsRegister<Register>(); - const auto condition = GenerateTest(cond, false, codegen_); + // Avoid 32-bit instructions if possible. + if (out == right) { + std::swap(left, right); + } - __ mov(out, ShifterOperand(0), AL, kCcKeep); - __ b(final_label, condition.second); - __ LoadImmediate(out, 1); - } + __ eor(out, left, ShifterOperand(right)); - if (done_label.IsLinked()) { - __ Bind(&done_label); + if (condition == kCondEQ) { + __ eor(out, out, ShifterOperand(1)); + } + + return; } + + GenerateConditionIntegralOrNonPrimitive(cond, codegen_); } void LocationsBuilderARM::VisitEqual(HEqual* comp) { @@ -3082,6 +3508,15 @@ void InstructionCodeGeneratorARM::VisitDoubleConstant(HDoubleConstant* constant // Will be generated at use site. } +void LocationsBuilderARM::VisitConstructorFence(HConstructorFence* constructor_fence) { + constructor_fence->SetLocations(nullptr); +} + +void InstructionCodeGeneratorARM::VisitConstructorFence( + HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + void LocationsBuilderARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } @@ -5287,7 +5722,18 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation()) { + // If link-time thunks for the Baker read barrier are enabled, for AOT + // loads we need a temporary only if the offset is too big. + if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { + locations->AddTemp(Location::RequiresRegister()); + } + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } else { + locations->AddTemp(Location::RequiresRegister()); + } } } @@ -5753,11 +6199,35 @@ void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) { Location::RequiresRegister(), object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier. - // Also need for String compression feature. - if ((object_array_get_with_read_barrier && kUseBakerReadBarrier) - || (mirror::kUseStringCompression && instruction->IsStringCharAt())) { + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier. + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation() && + instruction->GetIndex()->IsConstant()) { + // Array loads with constant index are treated as field loads. + // If link-time thunks for the Baker read barrier are enabled, for AOT + // constant index loads we need a temporary only if the offset is too big. + uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); + uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); + offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot); + if (offset >= kReferenceLoadMinFarOffset) { + locations->AddTemp(Location::RequiresRegister()); + } + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays && + !Runtime::Current()->UseJitCompilation() && + !instruction->GetIndex()->IsConstant()) { + // We need a non-scratch temporary for the array data pointer. + locations->AddTemp(Location::RequiresRegister()); + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } else { + locations->AddTemp(Location::RequiresRegister()); + } + } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + // Also need a temporary for String compression feature. locations->AddTemp(Location::RequiresRegister()); } } @@ -5869,8 +6339,20 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { Location temp = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier call. - codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); + if (index.IsConstant()) { + // Array load with a constant index can be treated as a field load. + data_offset += helpers::Int32ConstantFrom(index) << Primitive::ComponentSizeShift(type); + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + data_offset, + locations->GetTemp(0), + /* needs_null_check */ false); + } else { + codegen_->GenerateArrayLoadWithBakerReadBarrier( + instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false); + } } else { Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { @@ -6275,6 +6757,15 @@ void InstructionCodeGeneratorARM::VisitIntermediateAddress(HIntermediateAddress* } } +void LocationsBuilderARM::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) { + LOG(FATAL) << "Unreachable " << instruction->GetId(); +} + +void InstructionCodeGeneratorARM::VisitIntermediateAddressIndex( + HIntermediateAddressIndex* instruction) { + LOG(FATAL) << "Unreachable " << instruction->GetId(); +} + void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) { RegisterSet caller_saves = RegisterSet::Empty(); InvokeRuntimeCallingConvention calling_convention; @@ -6645,21 +7136,15 @@ HLoadClass::LoadKind CodeGeneratorARM::GetSupportedLoadClassKind( UNREACHABLE(); case HLoadClass::LoadKind::kReferrersClass: break; - case HLoadClass::LoadKind::kBootImageLinkTimeAddress: - DCHECK(!GetCompilerOptions().GetCompilePic()); - break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - DCHECK(GetCompilerOptions().GetCompilePic()); - break; - case HLoadClass::LoadKind::kBootImageAddress: - break; case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; case HLoadClass::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kBootImageAddress: + case HLoadClass::LoadKind::kRuntimeCall: break; } return desired_class_load_kind; @@ -6667,7 +7152,7 @@ HLoadClass::LoadKind CodeGeneratorARM::GetSupportedLoadClassKind( void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( cls, @@ -6707,13 +7192,20 @@ void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) { // For non-Baker read barrier we have a temp-clobbering call. } } + if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { + if (load_kind == HLoadClass::LoadKind::kBssEntry || + (load_kind == HLoadClass::LoadKind::kReferrersClass && + !Runtime::Current()->UseJitCompilation())) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } + } } // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not // move. void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); return; } @@ -6740,13 +7232,6 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE read_barrier_option); break; } - case HLoadClass::LoadKind::kBootImageLinkTimeAddress: { - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - __ LoadLiteral(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(), - cls->GetTypeIndex())); - break; - } case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); @@ -6792,7 +7277,7 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option); break; } - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: case HLoadClass::LoadKind::kInvalid: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -6846,21 +7331,15 @@ void InstructionCodeGeneratorARM::GenerateClassInitializationCheck( HLoadString::LoadKind CodeGeneratorARM::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimeAddress: - DCHECK(!GetCompilerOptions().GetCompilePic()); - break; case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - DCHECK(GetCompilerOptions().GetCompilePic()); - break; - case HLoadString::LoadKind::kBootImageAddress: - break; case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kBootImageAddress: + case HLoadString::LoadKind::kRuntimeCall: break; } return desired_string_load_kind; @@ -6870,7 +7349,7 @@ void LocationsBuilderARM::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); - if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadString::LoadKind::kRuntimeCall) { locations->SetOut(Location::RegisterLocation(R0)); } else { locations->SetOut(Location::RequiresRegister()); @@ -6886,6 +7365,9 @@ void LocationsBuilderARM::VisitLoadString(HLoadString* load) { // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() // that the the kPrimNot result register is the same as the first argument register. locations->SetCustomSlowPathCallerSaves(caller_saves); + if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -6902,12 +7384,6 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) NO_THREAD_S HLoadString::LoadKind load_kind = load->GetLoadKind(); switch (load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimeAddress: { - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); - __ LoadLiteral(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(), - load->GetStringIndex())); - return; // No dex cache slow path. - } case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorARM::PcRelativePatchInfo* labels = @@ -6960,7 +7436,7 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) NO_THREAD_S } // TODO: Consider re-adding the compiler code to do string dex cache lookup again. - DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod); + DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall); InvokeRuntimeCallingConvention calling_convention; DCHECK_EQ(calling_convention.GetRegisterAt(0), out); __ LoadImmediate(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); @@ -7056,6 +7532,9 @@ void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { // Note that TypeCheckSlowPathARM uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + codegen_->MaybeAddBakerCcEntrypointTempForFields(locations); + } } void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { @@ -7929,48 +8408,96 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. - // - // Note that we do not actually check the value of - // `GetIsGcMarking()` to decide whether to mark the loaded GC - // root or not. Instead, we load into `temp` the read barrier - // mark entry point corresponding to register `root`. If `temp` - // is null, it means that `GetIsGcMarking()` is false, and vice - // versa. - // - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() - // // Slow path. - // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. - // } - - // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. - Location temp = Location::RegisterLocation(LR); - SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM( - instruction, root, /* entrypoint */ temp); - codegen_->AddSlowPath(slow_path); + if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk + // checks the reference and jumps to the entrypoint if needed. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &return_address; + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { + // goto gc_root_thunk<root_reg>(lr) + // } + // return_address: + + CheckLastTempIsBakerCcEntrypointRegister(instruction); + bool narrow = CanEmitNarrowLdr(root_reg, obj, offset); + uint32_t custom_data = + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow); + Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(IP, 12); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP); + __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset); + + Label return_address; + __ AdrCode(LR, &return_address); + __ CmpConstant(kBakerCcEntrypointRegister, 0); + // Currently the offset is always within range. If that changes, + // we shall have to split the load the same way as for fields. + DCHECK_LT(offset, kReferenceLoadMinFarOffset); + DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit()); + ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow); + int old_position = GetAssembler()->GetBuffer()->GetPosition(); + __ LoadFromOffset(kLoadWord, root_reg, obj, offset); + EmitPlaceholderBne(codegen_, bne_label); + __ Bind(&return_address); + DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(), + narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET); + } else { + // Note that we do not actually check the value of + // `GetIsGcMarking()` to decide whether to mark the loaded GC + // root or not. Instead, we load into `temp` the read barrier + // mark entry point corresponding to register `root`. If `temp` + // is null, it means that `GetIsGcMarking()` is false, and vice + // versa. + // + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. + // } + + // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. + Location temp = Location::RegisterLocation(LR); + SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM( + instruction, root, /* entrypoint */ temp); + codegen_->AddSlowPath(slow_path); - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - __ LoadFromOffset(kLoadWord, root_reg, obj, offset); - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadWord, root_reg, obj, offset); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } else { // GC root loaded through a slow path for read barriers other // than Baker's. @@ -7988,6 +8515,16 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct } } +void CodeGeneratorARM::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierLinkTimeThunksEnableForFields) { + if (!Runtime::Current()->UseJitCompilation()) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } + } +} + void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, Register obj, @@ -7997,6 +8534,76 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &gray_return_address; + // if (temp != nullptr) { + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // HeapReference<mirror::Object> reference = *(obj+offset); + // gray_return_address: + + DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + Register ref_reg = ref.AsRegister<Register>(); + bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset); + Register base = obj; + if (offset >= kReferenceLoadMinFarOffset) { + base = temp.AsRegister<Register>(); + DCHECK_NE(base, kBakerCcEntrypointRegister); + static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); + __ AddConstant(base, obj, offset & ~(kReferenceLoadMinFarOffset - 1u)); + offset &= (kReferenceLoadMinFarOffset - 1u); + // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large + // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely + // increase the overall code size when taking the generated thunks into account. + DCHECK(!narrow); + } + CheckLastTempIsBakerCcEntrypointRegister(instruction); + uint32_t custom_data = + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj, narrow); + Label* bne_label = NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(IP, 12); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP); + __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset); + + Label return_address; + __ AdrCode(LR, &return_address); + __ CmpConstant(kBakerCcEntrypointRegister, 0); + EmitPlaceholderBne(this, bne_label); + DCHECK_LT(offset, kReferenceLoadMinFarOffset); + DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit()); + ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow); + int old_position = GetAssembler()->GetBuffer()->GetPosition(); + __ LoadFromOffset(kLoadWord, ref_reg, base, offset); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + __ Bind(&return_address); + DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(), + narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); + return; + } + // /* HeapReference<Object> */ ref = *(obj + offset) Location no_index = Location::NoLocation(); ScaleFactor no_scale_factor = TIMES_1; @@ -8017,9 +8624,67 @@ void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + ScaleFactor scale_factor = TIMES_4; + + if (kBakerReadBarrierLinkTimeThunksEnableForArrays && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &gray_return_address; + // if (temp != nullptr) { + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // HeapReference<mirror::Object> reference = data[index]; + // gray_return_address: + + DCHECK(index.IsValid()); + Register index_reg = index.AsRegister<Register>(); + Register ref_reg = ref.AsRegister<Register>(); + Register data_reg = temp.AsRegister<Register>(); + DCHECK_NE(data_reg, kBakerCcEntrypointRegister); + + CheckLastTempIsBakerCcEntrypointRegister(instruction); + uint32_t custom_data = + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg); + Label* bne_label = NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(IP, 12); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP); + __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset); + __ AddConstant(data_reg, obj, data_offset); + + Label return_address; + __ AdrCode(LR, &return_address); + __ CmpConstant(kBakerCcEntrypointRegister, 0); + EmitPlaceholderBne(this, bne_label); + ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler())); + int old_position = GetAssembler()->GetBuffer()->GetPosition(); + __ ldr(ref_reg, Address(data_reg, index_reg, LSL, scale_factor)); + DCHECK(!needs_null_check); // The thunk cannot handle the null check. + GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + __ Bind(&return_address); + DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(), + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); + return; + } + // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - ScaleFactor scale_factor = TIMES_4; GenerateReferenceLoadWithBakerReadBarrier( instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check); } @@ -8031,9 +8696,7 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check, - bool always_update_field, - Register* temp2) { + bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -8044,6 +8707,73 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // not. // // Note that we do not actually check the value of `GetIsGcMarking()`; + // instead, we load into `temp2` the read barrier mark entry point + // corresponding to register `ref`. If `temp2` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // } + // } else { + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // } + + Register temp_reg = temp.AsRegister<Register>(); + + // Slow path marking the object `ref` when the GC is marking. The + // entrypoint will already be loaded in `temp2`. + Location temp2 = Location::RegisterLocation(LR); + SlowPathCodeARM* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM( + instruction, + ref, + obj, + offset, + index, + scale_factor, + needs_null_check, + temp_reg, + /* entrypoint */ temp2); + AddSlowPath(slow_path); + + // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, temp2.AsRegister<Register>(), TR, entry_point_offset); + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ CompareAndBranchIfNonZero(temp2.AsRegister<Register>(), slow_path->GetEntryLabel()); + // Fast path: the GC is not marking: just load the reference. + GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARM::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Location temp, + bool needs_null_check, + Register temp2) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // Query `art::Thread::Current()->GetIsGcMarking()` to decide + // whether we need to enter the slow path to update the reference + // field within `obj`. Then, in the slow path, check the gray bit + // in the lock word of the reference's holder (`obj`) to decide + // whether to mark `ref` and update the field or not. + // + // Note that we do not actually check the value of `GetIsGcMarking()`; // instead, we load into `temp3` the read barrier mark entry point // corresponding to register `ref`. If `temp3` is null, it means // that `GetIsGcMarking()` is false, and vice versa. @@ -8056,52 +8786,30 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // HeapReference<mirror::Object> ref = *src; // Original reference load. // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { + // old_ref = ref; // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); // } - // } else { - // HeapReference<mirror::Object> ref = *src; // Original reference load. // } Register temp_reg = temp.AsRegister<Register>(); - // Slow path marking the object `ref` when the GC is marking. The - // entrypoint will already be loaded in `temp3`. + // Slow path updating the object reference at address `obj + + // field_offset` when the GC is marking. The entrypoint will already + // be loaded in `temp3`. Location temp3 = Location::RegisterLocation(LR); - SlowPathCodeARM* slow_path; - if (always_update_field) { - DCHECK(temp2 != nullptr); - // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM only - // supports address of the form `obj + field_offset`, where `obj` - // is a register and `field_offset` is a register pair (of which - // only the lower half is used). Thus `offset` and `scale_factor` - // above are expected to be null in this code path. - DCHECK_EQ(offset, 0u); - DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); - Location field_offset = index; - slow_path = - new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM( - instruction, - ref, - obj, - offset, - /* index */ field_offset, - scale_factor, - needs_null_check, - temp_reg, - *temp2, - /* entrypoint */ temp3); - } else { - slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM( - instruction, - ref, - obj, - offset, - index, - scale_factor, - needs_null_check, - temp_reg, - /* entrypoint */ temp3); - } + SlowPathCodeARM* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM( + instruction, + ref, + obj, + /* offset */ 0u, + /* index */ field_offset, + /* scale_factor */ ScaleFactor::TIMES_1, + needs_null_check, + temp_reg, + temp2, + /* entrypoint */ temp3); AddSlowPath(slow_path); // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() @@ -8113,8 +8821,8 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // The entrypoint is null when the GC is not marking, this prevents one load compared to // checking GetIsGcMarking. __ CompareAndBranchIfNonZero(temp3.AsRegister<Register>(), slow_path->GetEntryLabel()); - // Fast path: just load the reference. - GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); + // Fast path: the GC is not marking: nothing to do (the field is + // up-to-date, and we don't need to load the reference). __ Bind(slow_path->GetExitLabel()); } @@ -8245,7 +8953,8 @@ Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOr // save one load. However, since this is just an intrinsic slow path we prefer this // simple and more robust approach rather that trying to determine if that's the case. SlowPathCode* slow_path = GetCurrentSlowPath(); - if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { + DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path. + if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>()); __ LoadFromOffset(kLoadWord, temp, SP, stack_offset); return temp; @@ -8253,8 +8962,7 @@ Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOr return location.AsRegister<Register>(); } -Location CodeGeneratorARM::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Location temp) { +void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { @@ -8267,6 +8975,18 @@ Location CodeGeneratorARM::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { + DCHECK(GetCompilerOptions().IsBootImage()); + Register temp_reg = temp.AsRegister<Register>(); + PcRelativePatchInfo* labels = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + __ BindTrackedLabel(&labels->movw_label); + __ movw(temp_reg, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->movt_label); + __ movt(temp_reg, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->add_pc_label); + __ add(temp_reg, temp_reg, ShifterOperand(PC)); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadImmediate(temp.AsRegister<Register>(), invoke->GetMethodAddress()); break; @@ -8303,11 +9023,6 @@ Location CodeGeneratorARM::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO break; } } - return callee_method; -} - -void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { - Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp); switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: @@ -8359,9 +9074,11 @@ void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp __ blx(LR); } -CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeStringPatch( - const DexFile& dex_file, dex::StringIndex string_index) { - return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeMethodPatch( + MethodReference target_method) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + &pc_relative_method_patches_); } CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeTypePatch( @@ -8374,6 +9091,11 @@ CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewTypeBssEntryPatch( return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_); } +CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeStringPatch( + const DexFile& dex_file, dex::StringIndex string_index) { + return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +} + CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeDexCacheArrayPatch( const DexFile& dex_file, uint32_t element_offset) { return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); @@ -8385,18 +9107,9 @@ CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativePatch( return &patches->back(); } -Literal* CodeGeneratorARM::DeduplicateBootImageStringLiteral(const DexFile& dex_file, - dex::StringIndex string_index) { - return boot_image_string_patches_.GetOrCreate( - StringReference(&dex_file, string_index), - [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); -} - -Literal* CodeGeneratorARM::DeduplicateBootImageTypeLiteral(const DexFile& dex_file, - dex::TypeIndex type_index) { - return boot_image_type_patches_.GetOrCreate( - TypeReference(&dex_file, type_index), - [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); +Label* CodeGeneratorARM::NewBakerReadBarrierPatch(uint32_t custom_data) { + baker_read_barrier_patches_.emplace_back(custom_data); + return &baker_read_barrier_patches_.back().label; } Literal* CodeGeneratorARM::DeduplicateBootImageAddressLiteral(uint32_t address) { @@ -8447,43 +9160,32 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche DCHECK(linker_patches->empty()); size_t size = /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() + - boot_image_string_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + - boot_image_type_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_method_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size(); + /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + + baker_read_barrier_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); - for (const auto& entry : boot_image_string_patches_) { - const StringReference& target_string = entry.first; - Literal* literal = entry.second; - DCHECK(literal->GetLabel()->IsBound()); - uint32_t literal_offset = literal->GetLabel()->Position(); - linker_patches->push_back(LinkerPatch::StringPatch(literal_offset, - target_string.dex_file, - target_string.string_index.index_)); - } - if (!GetCompilerOptions().IsBootImage()) { - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); - } else { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, linker_patches); + } else { + DCHECK(pc_relative_method_patches_.empty()); + DCHECK(pc_relative_type_patches_.empty()); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); - for (const auto& entry : boot_image_type_patches_) { - const TypeReference& target_type = entry.first; - Literal* literal = entry.second; - DCHECK(literal->GetLabel()->IsBound()); - uint32_t literal_offset = literal->GetLabel()->Position(); - linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, - target_type.dex_file, - target_type.type_index.index_)); + for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { + linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.Position(), + info.custom_data)); } DCHECK_EQ(size, linker_patches->size()); } @@ -8494,13 +9196,6 @@ Literal* CodeGeneratorARM::DeduplicateUint32Literal(uint32_t value, Uint32ToLite [this, value]() { return __ NewLiteral<uint32_t>(value); }); } -Literal* CodeGeneratorARM::DeduplicateMethodLiteral(MethodReference target_method, - MethodToLiteralMap* map) { - return map->GetOrCreate( - target_method, - [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); -} - void LocationsBuilderARM::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall); @@ -8716,14 +9411,20 @@ static void PatchJitRootUse(uint8_t* code, void CodeGeneratorARM::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { for (const auto& entry : jit_string_patches_) { - const auto& it = jit_string_roots_.find(entry.first); + const StringReference& string_reference = entry.first; + Literal* table_entry_literal = entry.second; + const auto it = jit_string_roots_.find(string_reference); DCHECK(it != jit_string_roots_.end()); - PatchJitRootUse(code, roots_data, entry.second, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } for (const auto& entry : jit_class_patches_) { - const auto& it = jit_class_roots_.find(entry.first); + const TypeReference& type_reference = entry.first; + Literal* table_entry_literal = entry.second; + const auto it = jit_class_roots_.find(type_reference); DCHECK(it != jit_class_roots_.end()); - PatchJitRootUse(code, roots_data, entry.second, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 86f2f21df7..5f37d3bff1 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -24,8 +24,8 @@ #include "nodes.h" #include "string_reference.h" #include "parallel_move_resolver.h" +#include "type_reference.h" #include "utils/arm/assembler_thumb2.h" -#include "utils/type_reference.h" namespace art { namespace arm { @@ -299,7 +299,6 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator { void GenerateCompareTestAndBranch(HCondition* condition, Label* true_target, Label* false_target); - void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); @@ -456,7 +455,6 @@ class CodeGeneratorARM : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; @@ -482,15 +480,18 @@ class CodeGeneratorARM : public CodeGenerator { Label add_pc_label; }; - PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index); + PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); + PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, + dex::StringIndex string_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); - Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, - dex::StringIndex string_index); - Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index); + + // Add a new baker read barrier patch and return the label to be bound + // before the BNE instruction. + Label* NewBakerReadBarrierPatch(uint32_t custom_data); + Literal* DeduplicateBootImageAddressLiteral(uint32_t address); Literal* DeduplicateJitStringLiteral(const DexFile& dex_file, dex::StringIndex string_index, @@ -503,6 +504,10 @@ class CodeGeneratorARM : public CodeGenerator { void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + // Maybe add the reserved entrypoint register as a temporary for field load. This temp + // is added only for AOT compilation if link-time generated thunks for fields are enabled. + void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations); + // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -526,11 +531,6 @@ class CodeGeneratorARM : public CodeGenerator { // Load the object reference located at the address // `obj + offset + (index << scale_factor)`, held by object `obj`, into // `ref`, and mark it if needed. - // - // If `always_update_field` is true, the value of the reference is - // atomically updated in the holder (`obj`). This operation - // requires an extra temporary register, which must be provided as a - // non-null pointer (`temp2`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, Register obj, @@ -538,9 +538,27 @@ class CodeGeneratorARM : public CodeGenerator { Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check, - bool always_update_field = false, - Register* temp2 = nullptr); + bool needs_null_check); + + // Generate code checking whether the the reference field at the + // address `obj + field_offset`, held by object `obj`, needs to be + // marked, and if so, marking it and updating the field within `obj` + // with the marked value. + // + // This routine is used for the implementation of the + // UnsafeCASObject intrinsic with Baker read barriers. + // + // This method has a structure similar to + // GenerateReferenceLoadWithBakerReadBarrier, but note that argument + // `ref` is only as a temporary here, and thus its value should not + // be used afterwards. + void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Location temp, + bool needs_null_check, + Register temp2); // Generate a heap reference load (with no read barrier). void GenerateRawReferenceLoad(HInstruction* instruction, @@ -604,11 +622,18 @@ class CodeGeneratorARM : public CodeGenerator { void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; + // `temp` is an extra temporary register that is used for some conditions; + // callers may not specify it, in which case the method will use a scratch + // register instead. + void GenerateConditionWithZero(IfCondition condition, + Register out, + Register in, + Register temp = kNoRegister); + private: Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>; - using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>; using StringToLiteralMap = ArenaSafeMap<StringReference, Literal*, StringReferenceValueComparator>; @@ -616,8 +641,14 @@ class CodeGeneratorARM : public CodeGenerator { Literal*, TypeReferenceValueComparator>; + struct BakerReadBarrierPatchInfo { + explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { } + + Label label; + uint32_t custom_data; + }; + Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); - Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches); @@ -638,16 +669,16 @@ class CodeGeneratorARM : public CodeGenerator { Uint32ToLiteralMap uint32_literals_; // PC-relative patch info for each HArmDexCacheArraysBase. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; - // Deduplication map for boot string literals for kBootImageLinkTimeAddress. - StringToLiteralMap boot_image_string_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; - // Deduplication map for boot type literals for kBootImageLinkTimeAddress. - TypeToLiteralMap boot_image_type_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // Baker read barrier patch info. + ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; // Patches for string literals in JIT compiled code. StringToLiteralMap jit_string_patches_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index da146d72cd..f2b312362f 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -29,6 +29,7 @@ #include "linker/arm64/relative_patcher_arm64.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" +#include "lock_word.h" #include "offsets.h" #include "thread.h" #include "utils/arm64/assembler_arm64.h" @@ -91,6 +92,7 @@ constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB; // Flags controlling the use of link-time generated thunks for Baker read barriers. constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true; constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; // Some instructions have special requirements for a temporary, for example @@ -855,7 +857,7 @@ class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlo // Baker's read barriers, we need to perform the load of // mirror::Object::monitor_ *before* the original reference load. // This load-load ordering is required by the read barrier. - // The fast path/slow path (for Baker's algorithm) should look like: + // The slow path (for Baker's algorithm) should look like: // // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering @@ -1006,6 +1008,18 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 __ Bind(GetEntryLabel()); + // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM64's: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // old_ref = ref; + // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); + // } + // /* int32_t */ monitor = obj->monitor_ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); __ Ldr(temp_, HeapOperand(obj_, monitor_offset)); @@ -1436,13 +1450,10 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, uint64_literals_(std::less<uint64_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - boot_image_string_patches_(StringReferenceValueComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - boot_image_type_patches_(TypeReferenceValueComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -1502,7 +1513,7 @@ Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind ki if (kind == Location::kRegister) { scratch = LocationFrom(vixl_temps_.AcquireX()); } else { - DCHECK(kind == Location::kFpuRegister); + DCHECK_EQ(kind, Location::kFpuRegister); scratch = LocationFrom(codegen_->GetGraph()->HasSIMD() ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize) : vixl_temps_.AcquireD()); @@ -1730,9 +1741,9 @@ static bool CoherentConstantAndType(Location constant, Primitive::Type type) { (cst->IsDoubleConstant() && type == Primitive::kPrimDouble); } -// Allocate a scratch register from the VIXL pool, querying first into -// the floating-point register pool, and then the the core register -// pool. This is essentially a reimplementation of +// Allocate a scratch register from the VIXL pool, querying first +// the floating-point register pool, and then the core register +// pool. This is essentially a reimplementation of // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize // using a different allocation strategy. static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm, @@ -1880,7 +1891,7 @@ void CodeGeneratorARM64::MoveLocation(Location destination, // ask for a scratch register of any type (core or FP). // // Also, we start by asking for a FP scratch register first, as the - // demand of scratch core registers is higher. This is why we + // demand of scratch core registers is higher. This is why we // use AcquireFPOrCoreCPURegisterOfSize instead of // UseScratchRegisterScope::AcquireCPURegisterOfSize, which // allocates core scratch registers first. @@ -2648,6 +2659,38 @@ void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddres Operand(InputOperandAt(instruction, 1))); } +void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + + HIntConstant* shift = instruction->GetShift()->AsIntConstant(); + + locations->SetInAt(0, Location::RequiresRegister()); + // For byte case we don't need to shift the index variable so we can encode the data offset into + // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist + // data offset constant generation out of the loop and reduce the critical path length in the + // loop. + locations->SetInAt(1, shift->GetValue() == 0 + ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant()) + : Location::RequiresRegister()); + locations->SetInAt(2, Location::ConstantLocation(shift)); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex( + HIntermediateAddressIndex* instruction) { + Register index_reg = InputRegisterAt(instruction, 0); + uint32_t shift = Int64ConstantFrom(instruction->GetLocations()->InAt(2)); + uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue(); + + if (shift == 0) { + __ Add(OutputRegister(instruction), index_reg, offset); + } else { + Register offset_reg = InputRegisterAt(instruction, 1); + __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift)); + } +} + void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall); @@ -2764,6 +2807,7 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { // Object ArrayGet with Baker's read barrier case. // Note that a potential implicit null check is handled in the // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. + DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); if (index.IsConstant()) { // Array load with a constant index can be treated as a field load. offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); @@ -2774,12 +2818,12 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { obj.W(), offset, maybe_temp, - /* needs_null_check */ true, + /* needs_null_check */ false, /* use_load_acquire */ false); } else { Register temp = WRegisterFrom(locations->GetTemp(0)); codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true); + instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ false); } } else { // General case. @@ -4446,8 +4490,7 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStatic return desired_dispatch_info; } -Location CodeGeneratorARM64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Location temp) { +void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention. Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { @@ -4461,6 +4504,17 @@ Location CodeGeneratorARM64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStati case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { + DCHECK(GetCompilerOptions().IsBootImage()); + // Add ADRP with its PC-relative method patch. + vixl::aarch64::Label* adrp_label = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); + // Add ADD with its PC-relative method patch. + vixl::aarch64::Label* add_label = + NewPcRelativeMethodPatch(invoke->GetTargetMethod(), adrp_label); + EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp)); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: // Load method address from literal pool. __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress())); @@ -4501,12 +4555,6 @@ Location CodeGeneratorARM64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStati break; } } - return callee_method; -} - -void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { - // All registers are assumed to be correctly set up. - Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp); switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: @@ -4584,12 +4632,13 @@ void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* i codegen_->GenerateInvokePolymorphicCall(invoke); } -vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch( - const DexFile& dex_file, - dex::StringIndex string_index, +vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeMethodPatch( + MethodReference target_method, vixl::aarch64::Label* adrp_label) { - return - NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_); + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + adrp_label, + &pc_relative_method_patches_); } vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch( @@ -4606,6 +4655,14 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch( return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_); } +vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch( + const DexFile& dex_file, + dex::StringIndex string_index, + vixl::aarch64::Label* adrp_label) { + return + NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_); +} + vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch( const DexFile& dex_file, uint32_t element_offset, @@ -4632,20 +4689,6 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch( return label; } -vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageStringLiteral( - const DexFile& dex_file, dex::StringIndex string_index) { - return boot_image_string_patches_.GetOrCreate( - StringReference(&dex_file, string_index), - [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); }); -} - -vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageTypeLiteral( - const DexFile& dex_file, dex::TypeIndex type_index) { - return boot_image_type_patches_.GetOrCreate( - TypeReference(&dex_file, type_index), - [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); }); -} - vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral( uint64_t address) { return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_); @@ -4712,11 +4755,10 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc DCHECK(linker_patches->empty()); size_t size = pc_relative_dex_cache_patches_.size() + - boot_image_string_patches_.size() + - pc_relative_string_patches_.size() + - boot_image_type_patches_.size() + + pc_relative_method_patches_.size() + pc_relative_type_patches_.size() + type_bss_entry_patches_.size() + + pc_relative_string_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) { @@ -4725,32 +4767,21 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc info.pc_insn_label->GetLocation(), info.offset_or_index)); } - for (const auto& entry : boot_image_string_patches_) { - const StringReference& target_string = entry.first; - vixl::aarch64::Literal<uint32_t>* literal = entry.second; - linker_patches->push_back(LinkerPatch::StringPatch(literal->GetOffset(), - target_string.dex_file, - target_string.string_index.index_)); - } - if (!GetCompilerOptions().IsBootImage()) { - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); - } else { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, linker_patches); + } else { + DCHECK(pc_relative_method_patches_.empty()); + DCHECK(pc_relative_type_patches_.empty()); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); - for (const auto& entry : boot_image_type_patches_) { - const TypeReference& target_type = entry.first; - vixl::aarch64::Literal<uint32_t>* literal = entry.second; - linker_patches->push_back(LinkerPatch::TypePatch(literal->GetOffset(), - target_type.dex_file, - target_type.type_index.index_)); - } for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(), info.custom_data)); @@ -4771,14 +4802,6 @@ vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(u [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); }); } -vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodLiteral( - MethodReference target_method, - MethodToLiteralMap* map) { - return map->GetOrCreate( - target_method, - [this]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(/* placeholder */ 0u); }); -} - void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { // Explicit clinit checks triggered by static invokes must have been pruned by // art::PrepareForRegisterAllocation. @@ -4818,21 +4841,15 @@ HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( UNREACHABLE(); case HLoadClass::LoadKind::kReferrersClass: break; - case HLoadClass::LoadKind::kBootImageLinkTimeAddress: - DCHECK(!GetCompilerOptions().GetCompilePic()); - break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - DCHECK(GetCompilerOptions().GetCompilePic()); - break; - case HLoadClass::LoadKind::kBootImageAddress: - break; case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; case HLoadClass::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kBootImageAddress: + case HLoadClass::LoadKind::kRuntimeCall: break; } return desired_class_load_kind; @@ -4840,7 +4857,7 @@ HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( cls, @@ -4885,7 +4902,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { // move. void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); return; } @@ -4914,11 +4931,6 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA read_barrier_option); break; } - case HLoadClass::LoadKind::kBootImageLinkTimeAddress: - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - __ Ldr(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(), - cls->GetTypeIndex())); - break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); // Add ADRP with its PC-relative type patch. @@ -4972,7 +4984,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA read_barrier_option); break; } - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: case HLoadClass::LoadKind::kInvalid: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -5020,21 +5032,15 @@ void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear A HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimeAddress: - DCHECK(!GetCompilerOptions().GetCompilePic()); - break; case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - DCHECK(GetCompilerOptions().GetCompilePic()); - break; - case HLoadString::LoadKind::kBootImageAddress: - break; case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kBootImageAddress: + case HLoadString::LoadKind::kRuntimeCall: break; } return desired_string_load_kind; @@ -5043,7 +5049,7 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); - if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) { + if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; locations->SetOut(calling_convention.GetReturnLocation(load->GetType())); } else { @@ -5073,10 +5079,6 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD Location out_loc = load->GetLocations()->Out(); switch (load->GetLoadKind()) { - case HLoadString::LoadKind::kBootImageLinkTimeAddress: - __ Ldr(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(), - load->GetStringIndex())); - return; // No dex cache slow path. case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { // Add ADRP with its PC-relative String patch. const DexFile& dex_file = load->GetDexFile(); @@ -5478,6 +5480,15 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { } } +void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) { + constructor_fence->SetLocations(nullptr); +} + +void InstructionCodeGeneratorARM64::VisitConstructorFence( + HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } @@ -5929,9 +5940,9 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( !Runtime::Current()->UseJitCompilation()) { // Note that we do not actually check the value of `GetIsGcMarking()` // to decide whether to mark the loaded GC root or not. Instead, we - // load into `temp` the read barrier mark introspection entrypoint. - // If `temp` is null, it means that `GetIsGcMarking()` is false, and - // vice versa. + // load into `temp` (actually IP1) the read barrier mark introspection + // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is + // false, and vice versa. // // We use link-time generated thunks for the slow path. That thunk // checks the reference and jumps to the entrypoint if needed. @@ -6055,24 +6066,24 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins !use_load_acquire && !Runtime::Current()->UseJitCompilation()) { // Note that we do not actually check the value of `GetIsGcMarking()` - // to decide whether to mark the loaded GC root or not. Instead, we - // load into `temp` the read barrier mark introspection entrypoint. - // If `temp` is null, it means that `GetIsGcMarking()` is false, and - // vice versa. + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually IP1) the read barrier mark introspection + // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is + // false, and vice versa. // // We use link-time generated thunks for the slow path. That thunk checks // the holder and jumps to the entrypoint if needed. If the holder is not // gray, it creates a fake dependency and returns to the LDR instruction. // // temp = Thread::Current()->pReadBarrierMarkIntrospection - // lr = &return_address; + // lr = &gray_return_address; // if (temp != nullptr) { // goto field_thunk<holder_reg, base_reg>(lr) // } // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> root = *(obj+offset); + // HeapReference<mirror::Object> reference = *(obj+offset); // gray_return_address: DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); @@ -6142,16 +6153,74 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot); + + if (kBakerReadBarrierLinkTimeThunksEnableForArrays && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually IP1) the read barrier mark introspection + // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is + // false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &gray_return_address; + // if (temp != nullptr) { + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // HeapReference<mirror::Object> reference = data[index]; + // gray_return_address: + + DCHECK(index.IsValid()); + Register index_reg = RegisterFrom(index, Primitive::kPrimInt); + Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); + + UseScratchRegisterScope temps(GetVIXLAssembler()); + DCHECK(temps.IsAvailable(ip0)); + DCHECK(temps.IsAvailable(ip1)); + temps.Exclude(ip0, ip1); + uint32_t custom_data = + linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode()); + vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); + + // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip0.GetCode(), 16u); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode()); + __ Ldr(ip1, MemOperand(tr, entry_point_offset)); + __ Add(temp.X(), obj.X(), Operand(data_offset)); + EmissionCheckScope guard(GetVIXLAssembler(), + (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); + vixl::aarch64::Label return_address; + __ adr(lr, &return_address); + __ Bind(cbnz_label); + __ cbnz(ip1, static_cast<int64_t>(0)); // Placeholder, patched at link-time. + static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Array LDR must be 1 instruction (4B) before the return address label; " + " 2 instructions (8B) for heap poisoning."); + __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor)); + DCHECK(!needs_null_check); // The thunk cannot handle the null check. + GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + __ Bind(&return_address); + return; + } + // Array cells are never volatile variables, therefore array loads // never use Load-Acquire instructions on ARM64. const bool use_load_acquire = false; - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot); GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, @@ -6171,8 +6240,7 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* size_t scale_factor, Register temp, bool needs_null_check, - bool use_load_acquire, - bool always_update_field) { + bool use_load_acquire) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); // If we are emitting an array load, we should not be using a @@ -6209,41 +6277,18 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // entrypoint will already be loaded in `temp2`. Register temp2 = lr; Location temp2_loc = LocationFrom(temp2); - SlowPathCodeARM64* slow_path; - if (always_update_field) { - // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 - // only supports address of the form `obj + field_offset`, where - // `obj` is a register and `field_offset` is a register. Thus - // `offset` and `scale_factor` above are expected to be null in - // this code path. - DCHECK_EQ(offset, 0u); - DCHECK_EQ(scale_factor, 0u); /* "times 1" */ - Location field_offset = index; - slow_path = - new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( - instruction, - ref, - obj, - offset, - /* index */ field_offset, - scale_factor, - needs_null_check, - use_load_acquire, - temp, - /* entrypoint */ temp2_loc); - } else { - slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64( - instruction, - ref, - obj, - offset, - index, - scale_factor, - needs_null_check, - use_load_acquire, - temp, - /* entrypoint */ temp2_loc); - } + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64( + instruction, + ref, + obj, + offset, + index, + scale_factor, + needs_null_check, + use_load_acquire, + temp, + /* entrypoint */ temp2_loc); AddSlowPath(slow_path); // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() @@ -6255,12 +6300,83 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // The entrypoint is null when the GC is not marking, this prevents one load compared to // checking GetIsGcMarking. __ Cbnz(temp2, slow_path->GetEntryLabel()); - // Fast path: just load the reference. + // Fast path: the GC is not marking: just load the reference. GenerateRawReferenceLoad( instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire); __ Bind(slow_path->GetExitLabel()); } +void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Register temp, + bool needs_null_check, + bool use_load_acquire) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + // If we are emitting an array load, we should not be using a + // Load Acquire instruction. In other words: + // `instruction->IsArrayGet()` => `!use_load_acquire`. + DCHECK(!instruction->IsArrayGet() || !use_load_acquire); + + // Query `art::Thread::Current()->GetIsGcMarking()` to decide + // whether we need to enter the slow path to update the reference + // field within `obj`. Then, in the slow path, check the gray bit + // in the lock word of the reference's holder (`obj`) to decide + // whether to mark `ref` and update the field or not. + // + // Note that we do not actually check the value of `GetIsGcMarking()`; + // instead, we load into `temp2` the read barrier mark entry point + // corresponding to register `ref`. If `temp2` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // old_ref = ref; + // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); + // } + // } + + // Slow path updating the object reference at address `obj + field_offset` + // when the GC is marking. The entrypoint will already be loaded in `temp2`. + Register temp2 = lr; + Location temp2_loc = LocationFrom(temp2); + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( + instruction, + ref, + obj, + /* offset */ 0u, + /* index */ field_offset, + /* scale_factor */ 0u /* "times 1" */, + needs_null_check, + use_load_acquire, + temp, + /* entrypoint */ temp2_loc); + AddSlowPath(slow_path); + + // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ Ldr(temp2, MemOperand(tr, entry_point_offset)); + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ Cbnz(temp2, slow_path->GetEntryLabel()); + // Fast path: the GC is not marking: nothing to do (the field is + // up-to-date, and we don't need to load the reference). + __ Bind(slow_path->GetExitLabel()); +} + void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction, Location ref, Register obj, @@ -6436,14 +6552,20 @@ static void PatchJitRootUse(uint8_t* code, void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { for (const auto& entry : jit_string_patches_) { - const auto& it = jit_string_roots_.find(entry.first); + const StringReference& string_reference = entry.first; + vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; + const auto it = jit_string_roots_.find(string_reference); DCHECK(it != jit_string_roots_.end()); - PatchJitRootUse(code, roots_data, entry.second, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } for (const auto& entry : jit_class_patches_) { - const auto& it = jit_class_roots_.find(entry.first); + const TypeReference& type_reference = entry.first; + vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; + const auto it = jit_class_roots_.find(type_reference); DCHECK(it != jit_class_roots_.end()); - PatchJitRootUse(code, roots_data, entry.second, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 332ab49153..747fc9f0b1 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -25,8 +25,8 @@ #include "nodes.h" #include "parallel_move_resolver.h" #include "string_reference.h" +#include "type_reference.h" #include "utils/arm64/assembler_arm64.h" -#include "utils/type_reference.h" // TODO(VIXL): Make VIXL compile with -Wshadow. #pragma GCC diagnostic push @@ -318,12 +318,13 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); - vixl::aarch64::MemOperand CreateVecMemRegisters( + vixl::aarch64::MemOperand VecAddress( HVecMemoryOperation* instruction, - Location* reg_loc, - bool is_load, // This function may acquire a scratch register. - vixl::aarch64::UseScratchRegisterScope* temps_scope); + vixl::aarch64::UseScratchRegisterScope* temps_scope, + size_t size, + bool is_string_char_at, + /*out*/ vixl::aarch64::Register* scratch); Arm64Assembler* const assembler_; CodeGeneratorARM64* const codegen_; @@ -539,7 +540,6 @@ class CodeGeneratorARM64 : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; @@ -548,12 +548,11 @@ class CodeGeneratorARM64 : public CodeGenerator { UNIMPLEMENTED(FATAL); } - // Add a new PC-relative string patch for an instruction and return the label + // Add a new PC-relative method patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing // to the associated ADRP patch label). - vixl::aarch64::Label* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index, + vixl::aarch64::Label* NewPcRelativeMethodPatch(MethodReference target_method, vixl::aarch64::Label* adrp_label = nullptr); // Add a new PC-relative type patch for an instruction and return the label @@ -572,6 +571,14 @@ class CodeGeneratorARM64 : public CodeGenerator { dex::TypeIndex type_index, vixl::aarch64::Label* adrp_label = nullptr); + // Add a new PC-relative string patch for an instruction and return the label + // to be bound before the instruction. The instruction will be either the + // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing + // to the associated ADRP patch label). + vixl::aarch64::Label* NewPcRelativeStringPatch(const DexFile& dex_file, + dex::StringIndex string_index, + vixl::aarch64::Label* adrp_label = nullptr); + // Add a new PC-relative dex cache array patch for an instruction and return // the label to be bound before the instruction. The instruction will be // either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` @@ -585,11 +592,6 @@ class CodeGeneratorARM64 : public CodeGenerator { // before the CBNZ instruction. vixl::aarch64::Label* NewBakerReadBarrierPatch(uint32_t custom_data); - vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageStringLiteral( - const DexFile& dex_file, - dex::StringIndex string_index); - vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, - dex::TypeIndex type_index); vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address); vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(const DexFile& dex_file, dex::StringIndex string_index, @@ -634,9 +636,6 @@ class CodeGeneratorARM64 : public CodeGenerator { // Load the object reference located at the address // `obj + offset + (index << scale_factor)`, held by object `obj`, into // `ref`, and mark it if needed. - // - // If `always_update_field` is true, the value of the reference is - // atomically updated in the holder (`obj`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl::aarch64::Register obj, @@ -645,8 +644,27 @@ class CodeGeneratorARM64 : public CodeGenerator { size_t scale_factor, vixl::aarch64::Register temp, bool needs_null_check, - bool use_load_acquire, - bool always_update_field = false); + bool use_load_acquire); + + // Generate code checking whether the the reference field at the + // address `obj + field_offset`, held by object `obj`, needs to be + // marked, and if so, marking it and updating the field within `obj` + // with the marked value. + // + // This routine is used for the implementation of the + // UnsafeCASObject intrinsic with Baker read barriers. + // + // This method has a structure similar to + // GenerateReferenceLoadWithBakerReadBarrier, but note that argument + // `ref` is only as a temporary here, and thus its value should not + // be used afterwards. + void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::aarch64::Register obj, + Location field_offset, + vixl::aarch64::Register temp, + bool needs_null_check, + bool use_load_acquire); // Generate a heap reference load (with no read barrier). void GenerateRawReferenceLoad(HInstruction* instruction, @@ -714,9 +732,6 @@ class CodeGeneratorARM64 : public CodeGenerator { private: using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>; using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>; - using MethodToLiteralMap = ArenaSafeMap<MethodReference, - vixl::aarch64::Literal<uint64_t>*, - MethodReferenceComparator>; using StringToLiteralMap = ArenaSafeMap<StringReference, vixl::aarch64::Literal<uint32_t>*, StringReferenceValueComparator>; @@ -727,8 +742,6 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value); - vixl::aarch64::Literal<uint64_t>* DeduplicateMethodLiteral(MethodReference target_method, - MethodToLiteralMap* map); // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays // and boot image strings/types. The only difference is the interpretation of the @@ -780,16 +793,14 @@ class CodeGeneratorARM64 : public CodeGenerator { Uint64ToLiteralMap uint64_literals_; // PC-relative DexCache access info. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; - // Deduplication map for boot string literals for kBootImageLinkTimeAddress. - StringToLiteralMap boot_image_string_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; - // Deduplication map for boot type literals for kBootImageLinkTimeAddress. - TypeToLiteralMap boot_image_type_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index c37cc52a2b..93cbc3b17c 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -16,6 +16,7 @@ #include "code_generator_arm_vixl.h" +#include "arch/arm/asm_support_arm.h" #include "arch/arm/instruction_set_features_arm.h" #include "art_method.h" #include "code_generator_utils.h" @@ -24,6 +25,7 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" #include "intrinsics_arm_vixl.h" +#include "linker/arm/relative_patcher_thumb2.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "thread.h" @@ -77,6 +79,20 @@ static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte; static constexpr int kCurrentMethodStackOffset = 0; static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; +// Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle +// offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions. +// For the Baker read barrier implementation using link-generated thunks we need to split +// the offset explicitly. +constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB; + +// Flags controlling the use of link-time generated thunks for Baker read barriers. +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true; +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; + +// The reserved entrypoint register for link-time generated thunks. +const vixl32::Register kBakerCcEntrypointRegister = r4; + #ifdef __ #error "ARM Codegen VIXL macro-assembler macro already defined." #endif @@ -88,6 +104,60 @@ static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; // Marker that code is yet to be, and must, be implemented. #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented " +static inline void ExcludeIPAndBakerCcEntrypointRegister(UseScratchRegisterScope* temps, + HInstruction* instruction) { + DCHECK(temps->IsAvailable(ip)); + temps->Exclude(ip); + DCHECK(!temps->IsAvailable(kBakerCcEntrypointRegister)); + DCHECK_EQ(kBakerCcEntrypointRegister.GetCode(), + linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister); + DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u); + DCHECK(RegisterFrom(instruction->GetLocations()->GetTemp( + instruction->GetLocations()->GetTempCount() - 1u)).Is(kBakerCcEntrypointRegister)); +} + +static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Label* patch_label) { + ExactAssemblyScope eas(codegen->GetVIXLAssembler(), kMaxInstructionSizeInBytes); + __ bind(patch_label); + vixl32::Label placeholder_label; + __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time. + __ bind(&placeholder_label); +} + +static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) { + return rt.IsLow() && rn.IsLow() && offset < 32u; +} + +class EmitAdrCode { + public: + EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label) + : assembler_(assembler), rd_(rd), label_(label) { + ExactAssemblyScope aas(assembler, kMaxInstructionSizeInBytes); + adr_location_ = assembler->GetCursorOffset(); + assembler->adr(EncodingSize(Wide), rd, label); + } + + ~EmitAdrCode() { + DCHECK(label_->IsBound()); + // The ADR emitted by the assembler does not set the Thumb mode bit we need. + // TODO: Maybe extend VIXL to allow ADR for return address? + uint8_t* raw_adr = assembler_->GetBuffer()->GetOffsetAddress<uint8_t*>(adr_location_); + // Expecting ADR encoding T3 with `(offset & 1) == 0`. + DCHECK_EQ(raw_adr[1] & 0xfbu, 0xf2u); // Check bits 24-31, except 26. + DCHECK_EQ(raw_adr[0] & 0xffu, 0x0fu); // Check bits 16-23. + DCHECK_EQ(raw_adr[3] & 0x8fu, rd_.GetCode()); // Check bits 8-11 and 15. + DCHECK_EQ(raw_adr[2] & 0x01u, 0x00u); // Check bit 0, i.e. the `offset & 1`. + // Add the Thumb mode bit. + raw_adr[2] |= 0x01u; + } + + private: + ArmVIXLMacroAssembler* const assembler_; + vixl32::Register rd_; + vixl32::Label* const label_; + int32_t adr_location_; +}; + // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers, // for each live D registers they treat two corresponding S registers as live ones. // @@ -851,7 +921,7 @@ class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkS // Baker's read barriers, we need to perform the load of // mirror::Object::monitor_ *before* the original reference load. // This load-load ordering is required by the read barrier. - // The fast path/slow path (for Baker's algorithm) should look like: + // The slow path (for Baker's algorithm) should look like: // // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering @@ -993,6 +1063,18 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL __ Bind(GetEntryLabel()); + // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARMVIXL's: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // old_ref = ref; + // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); + // } + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); // /* int32_t */ monitor = obj->monitor_ @@ -1693,6 +1775,34 @@ static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codege } } +static int64_t AdjustConstantForCondition(int64_t value, + IfCondition* condition, + IfCondition* opposite) { + if (value == 1) { + if (*condition == kCondB) { + value = 0; + *condition = kCondEQ; + *opposite = kCondNE; + } else if (*condition == kCondAE) { + value = 0; + *condition = kCondNE; + *opposite = kCondEQ; + } + } else if (value == -1) { + if (*condition == kCondGT) { + value = 0; + *condition = kCondGE; + *opposite = kCondLT; + } else if (*condition == kCondLE) { + value = 0; + *condition = kCondLT; + *opposite = kCondGE; + } + } + + return value; +} + static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant( HCondition* condition, bool invert, @@ -1715,7 +1825,37 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant( const vixl32::Register left_high = HighRegisterFrom(left); const vixl32::Register left_low = LowRegisterFrom(left); - int64_t value = Int64ConstantFrom(right); + int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), &cond, &opposite); + UseScratchRegisterScope temps(codegen->GetVIXLAssembler()); + + // Comparisons against 0 are common enough to deserve special attention. + if (value == 0) { + switch (cond) { + case kCondNE: + // x > 0 iff x != 0 when the comparison is unsigned. + case kCondA: + ret = std::make_pair(ne, eq); + FALLTHROUGH_INTENDED; + case kCondEQ: + // x <= 0 iff x == 0 when the comparison is unsigned. + case kCondBE: + __ Orrs(temps.Acquire(), left_low, left_high); + return ret; + case kCondLT: + case kCondGE: + __ Cmp(left_high, 0); + return std::make_pair(ARMCondition(cond), ARMCondition(opposite)); + // Trivially true or false. + case kCondB: + ret = std::make_pair(ne, eq); + FALLTHROUGH_INTENDED; + case kCondAE: + __ Cmp(left_low, left_low); + return ret; + default: + break; + } + } switch (cond) { case kCondEQ: @@ -1760,8 +1900,6 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant( FALLTHROUGH_INTENDED; case kCondGE: case kCondLT: { - UseScratchRegisterScope temps(codegen->GetVIXLAssembler()); - __ Cmp(left_low, Low32Bits(value)); __ Sbcs(temps.Acquire(), left_high, High32Bits(value)); ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite)); @@ -1879,18 +2017,22 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler) { if (condition->GetLeft()->GetType() == Primitive::kPrimLong) { const LocationSummary* const locations = condition->GetLocations(); - const IfCondition c = condition->GetCondition(); if (locations->InAt(1).IsConstant()) { - const int64_t value = Int64ConstantFrom(locations->InAt(1)); + IfCondition c = condition->GetCondition(); + IfCondition opposite = condition->GetOppositeCondition(); + const int64_t value = + AdjustConstantForCondition(Int64ConstantFrom(locations->InAt(1)), &c, &opposite); if (c < kCondLT || c > kCondGE) { // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, // we check that the least significant half of the first input to be compared // is in a low register (the other half is read outside an IT block), and // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP - // encoding can be used. - if (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value))) { + // encoding can be used; 0 is always handled, no matter what registers are + // used by the first input. + if (value != 0 && + (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value)))) { return false; } // TODO(VIXL): The rest of the checks are there to keep the backend in sync with @@ -1909,6 +2051,354 @@ static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler) return true; } +static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* codegen) { + DCHECK(CanGenerateTest(cond, codegen->GetAssembler())); + + const vixl32::Register out = OutputRegister(cond); + const auto condition = GenerateTest(cond, false, codegen); + + __ Mov(LeaveFlags, out, 0); + + if (out.IsLow()) { + // We use the scope because of the IT block that follows. + ExactAssemblyScope guard(codegen->GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + + __ it(condition.first); + __ mov(condition.first, out, 1); + } else { + vixl32::Label done_label; + vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label); + + __ B(condition.second, final_label, /* far_target */ false); + __ Mov(out, 1); + + if (done_label.IsReferenced()) { + __ Bind(&done_label); + } + } +} + +static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) { + DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong); + + const LocationSummary* const locations = cond->GetLocations(); + IfCondition condition = cond->GetCondition(); + const vixl32::Register out = OutputRegister(cond); + const Location left = locations->InAt(0); + const Location right = locations->InAt(1); + vixl32::Register left_high = HighRegisterFrom(left); + vixl32::Register left_low = LowRegisterFrom(left); + vixl32::Register temp; + UseScratchRegisterScope temps(codegen->GetVIXLAssembler()); + + if (right.IsConstant()) { + IfCondition opposite = cond->GetOppositeCondition(); + const int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), + &condition, + &opposite); + Operand right_high = High32Bits(value); + Operand right_low = Low32Bits(value); + + // The output uses Location::kNoOutputOverlap. + if (out.Is(left_high)) { + std::swap(left_low, left_high); + std::swap(right_low, right_high); + } + + __ Sub(out, left_low, right_low); + temp = temps.Acquire(); + __ Sub(temp, left_high, right_high); + } else { + DCHECK(right.IsRegisterPair()); + temp = temps.Acquire(); + __ Sub(temp, left_high, HighRegisterFrom(right)); + __ Sub(out, left_low, LowRegisterFrom(right)); + } + + // Need to check after calling AdjustConstantForCondition(). + DCHECK(condition == kCondEQ || condition == kCondNE) << condition; + + if (condition == kCondNE && out.IsLow()) { + __ Orrs(out, out, temp); + + // We use the scope because of the IT block that follows. + ExactAssemblyScope guard(codegen->GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + + __ it(ne); + __ mov(ne, out, 1); + } else { + __ Orr(out, out, temp); + codegen->GenerateConditionWithZero(condition, out, out, temp); + } +} + +static void GenerateLongComparesAndJumps(HCondition* cond, + vixl32::Label* true_label, + vixl32::Label* false_label, + CodeGeneratorARMVIXL* codegen, + bool is_far_target = true) { + LocationSummary* locations = cond->GetLocations(); + Location left = locations->InAt(0); + Location right = locations->InAt(1); + IfCondition if_cond = cond->GetCondition(); + + vixl32::Register left_high = HighRegisterFrom(left); + vixl32::Register left_low = LowRegisterFrom(left); + IfCondition true_high_cond = if_cond; + IfCondition false_high_cond = cond->GetOppositeCondition(); + vixl32::Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part + + // Set the conditions for the test, remembering that == needs to be + // decided using the low words. + switch (if_cond) { + case kCondEQ: + case kCondNE: + // Nothing to do. + break; + case kCondLT: + false_high_cond = kCondGT; + break; + case kCondLE: + true_high_cond = kCondLT; + break; + case kCondGT: + false_high_cond = kCondLT; + break; + case kCondGE: + true_high_cond = kCondGT; + break; + case kCondB: + false_high_cond = kCondA; + break; + case kCondBE: + true_high_cond = kCondB; + break; + case kCondA: + false_high_cond = kCondB; + break; + case kCondAE: + true_high_cond = kCondA; + break; + } + if (right.IsConstant()) { + int64_t value = Int64ConstantFrom(right); + int32_t val_low = Low32Bits(value); + int32_t val_high = High32Bits(value); + + __ Cmp(left_high, val_high); + if (if_cond == kCondNE) { + __ B(ARMCondition(true_high_cond), true_label, is_far_target); + } else if (if_cond == kCondEQ) { + __ B(ARMCondition(false_high_cond), false_label, is_far_target); + } else { + __ B(ARMCondition(true_high_cond), true_label, is_far_target); + __ B(ARMCondition(false_high_cond), false_label, is_far_target); + } + // Must be equal high, so compare the lows. + __ Cmp(left_low, val_low); + } else { + vixl32::Register right_high = HighRegisterFrom(right); + vixl32::Register right_low = LowRegisterFrom(right); + + __ Cmp(left_high, right_high); + if (if_cond == kCondNE) { + __ B(ARMCondition(true_high_cond), true_label, is_far_target); + } else if (if_cond == kCondEQ) { + __ B(ARMCondition(false_high_cond), false_label, is_far_target); + } else { + __ B(ARMCondition(true_high_cond), true_label, is_far_target); + __ B(ARMCondition(false_high_cond), false_label, is_far_target); + } + // Must be equal high, so compare the lows. + __ Cmp(left_low, right_low); + } + // The last comparison might be unsigned. + // TODO: optimize cases where this is always true/false + __ B(final_condition, true_label, is_far_target); +} + +static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) { + DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong); + + const LocationSummary* const locations = cond->GetLocations(); + IfCondition condition = cond->GetCondition(); + const vixl32::Register out = OutputRegister(cond); + const Location left = locations->InAt(0); + const Location right = locations->InAt(1); + + if (right.IsConstant()) { + IfCondition opposite = cond->GetOppositeCondition(); + + // Comparisons against 0 are common enough to deserve special attention. + if (AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite) == 0) { + switch (condition) { + case kCondNE: + case kCondA: + if (out.IsLow()) { + // We only care if both input registers are 0 or not. + __ Orrs(out, LowRegisterFrom(left), HighRegisterFrom(left)); + + // We use the scope because of the IT block that follows. + ExactAssemblyScope guard(codegen->GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + + __ it(ne); + __ mov(ne, out, 1); + return; + } + + FALLTHROUGH_INTENDED; + case kCondEQ: + case kCondBE: + // We only care if both input registers are 0 or not. + __ Orr(out, LowRegisterFrom(left), HighRegisterFrom(left)); + codegen->GenerateConditionWithZero(condition, out, out); + return; + case kCondLT: + case kCondGE: + // We only care about the sign bit. + FALLTHROUGH_INTENDED; + case kCondAE: + case kCondB: + codegen->GenerateConditionWithZero(condition, out, HighRegisterFrom(left)); + return; + case kCondLE: + case kCondGT: + default: + break; + } + } + } + + if ((condition == kCondEQ || condition == kCondNE) && + // If `out` is a low register, then the GenerateConditionGeneric() + // function generates a shorter code sequence that is still branchless. + (!out.IsLow() || !CanGenerateTest(cond, codegen->GetAssembler()))) { + GenerateEqualLong(cond, codegen); + return; + } + + if (CanGenerateTest(cond, codegen->GetAssembler())) { + GenerateConditionGeneric(cond, codegen); + return; + } + + // Convert the jumps into the result. + vixl32::Label done_label; + vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label); + vixl32::Label true_label, false_label; + + GenerateLongComparesAndJumps(cond, &true_label, &false_label, codegen, /* is_far_target */ false); + + // False case: result = 0. + __ Bind(&false_label); + __ Mov(out, 0); + __ B(final_label); + + // True case: result = 1. + __ Bind(&true_label); + __ Mov(out, 1); + + if (done_label.IsReferenced()) { + __ Bind(&done_label); + } +} + +static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, CodeGeneratorARMVIXL* codegen) { + const Primitive::Type type = cond->GetLeft()->GetType(); + + DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; + + if (type == Primitive::kPrimLong) { + GenerateConditionLong(cond, codegen); + return; + } + + IfCondition condition = cond->GetCondition(); + vixl32::Register in = InputRegisterAt(cond, 0); + const vixl32::Register out = OutputRegister(cond); + const Location right = cond->GetLocations()->InAt(1); + int64_t value; + + if (right.IsConstant()) { + IfCondition opposite = cond->GetOppositeCondition(); + + value = AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite); + + // Comparisons against 0 are common enough to deserve special attention. + if (value == 0) { + switch (condition) { + case kCondNE: + case kCondA: + if (out.IsLow() && out.Is(in)) { + __ Cmp(out, 0); + + // We use the scope because of the IT block that follows. + ExactAssemblyScope guard(codegen->GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + + __ it(ne); + __ mov(ne, out, 1); + return; + } + + FALLTHROUGH_INTENDED; + case kCondEQ: + case kCondBE: + case kCondLT: + case kCondGE: + case kCondAE: + case kCondB: + codegen->GenerateConditionWithZero(condition, out, in); + return; + case kCondLE: + case kCondGT: + default: + break; + } + } + } + + if (condition == kCondEQ || condition == kCondNE) { + Operand operand(0); + + if (right.IsConstant()) { + operand = Operand::From(value); + } else if (out.Is(RegisterFrom(right))) { + // Avoid 32-bit instructions if possible. + operand = InputOperandAt(cond, 0); + in = RegisterFrom(right); + } else { + operand = InputOperandAt(cond, 1); + } + + if (condition == kCondNE && out.IsLow()) { + __ Subs(out, in, operand); + + // We use the scope because of the IT block that follows. + ExactAssemblyScope guard(codegen->GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + + __ it(ne); + __ mov(ne, out, 1); + } else { + __ Sub(out, in, operand); + codegen->GenerateConditionWithZero(condition, out, out); + } + + return; + } + + GenerateConditionGeneric(cond, codegen); +} + static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) { const Primitive::Type type = constant->GetType(); bool ret = false; @@ -2011,13 +2501,11 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, uint32_literals_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - boot_image_string_patches_(StringReferenceValueComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - boot_image_type_patches_(TypeReferenceValueComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), @@ -2468,92 +2956,10 @@ void LocationsBuilderARMVIXL::VisitExit(HExit* exit) { void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { } -void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* cond, - vixl32::Label* true_label, - vixl32::Label* false_label) { - LocationSummary* locations = cond->GetLocations(); - Location left = locations->InAt(0); - Location right = locations->InAt(1); - IfCondition if_cond = cond->GetCondition(); - - vixl32::Register left_high = HighRegisterFrom(left); - vixl32::Register left_low = LowRegisterFrom(left); - IfCondition true_high_cond = if_cond; - IfCondition false_high_cond = cond->GetOppositeCondition(); - vixl32::Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part - - // Set the conditions for the test, remembering that == needs to be - // decided using the low words. - switch (if_cond) { - case kCondEQ: - case kCondNE: - // Nothing to do. - break; - case kCondLT: - false_high_cond = kCondGT; - break; - case kCondLE: - true_high_cond = kCondLT; - break; - case kCondGT: - false_high_cond = kCondLT; - break; - case kCondGE: - true_high_cond = kCondGT; - break; - case kCondB: - false_high_cond = kCondA; - break; - case kCondBE: - true_high_cond = kCondB; - break; - case kCondA: - false_high_cond = kCondB; - break; - case kCondAE: - true_high_cond = kCondA; - break; - } - if (right.IsConstant()) { - int64_t value = Int64ConstantFrom(right); - int32_t val_low = Low32Bits(value); - int32_t val_high = High32Bits(value); - - __ Cmp(left_high, val_high); - if (if_cond == kCondNE) { - __ B(ARMCondition(true_high_cond), true_label); - } else if (if_cond == kCondEQ) { - __ B(ARMCondition(false_high_cond), false_label); - } else { - __ B(ARMCondition(true_high_cond), true_label); - __ B(ARMCondition(false_high_cond), false_label); - } - // Must be equal high, so compare the lows. - __ Cmp(left_low, val_low); - } else { - vixl32::Register right_high = HighRegisterFrom(right); - vixl32::Register right_low = LowRegisterFrom(right); - - __ Cmp(left_high, right_high); - if (if_cond == kCondNE) { - __ B(ARMCondition(true_high_cond), true_label); - } else if (if_cond == kCondEQ) { - __ B(ARMCondition(false_high_cond), false_label); - } else { - __ B(ARMCondition(true_high_cond), true_label); - __ B(ARMCondition(false_high_cond), false_label); - } - // Must be equal high, so compare the lows. - __ Cmp(left_low, right_low); - } - // The last comparison might be unsigned. - // TODO: optimize cases where this is always true/false - __ B(final_condition, true_label); -} - void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition, vixl32::Label* true_target_in, - vixl32::Label* false_target_in) { + vixl32::Label* false_target_in, + bool is_far_target) { if (CanGenerateTest(condition, codegen_->GetAssembler())) { vixl32::Label* non_fallthrough_target; bool invert; @@ -2575,7 +2981,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* c const auto cond = GenerateTest(condition, invert, codegen_); - __ B(cond.first, non_fallthrough_target); + __ B(cond.first, non_fallthrough_target, is_far_target); if (emit_both_branches) { // No target falls through, we need to branch. @@ -2592,7 +2998,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* c vixl32::Label* false_target = (false_target_in == nullptr) ? &fallthrough : false_target_in; DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong); - GenerateLongComparesAndJumps(condition, true_target, false_target); + GenerateLongComparesAndJumps(condition, true_target, false_target, codegen_, is_far_target); if (false_target != &fallthrough) { __ B(false_target); @@ -2660,7 +3066,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instru // the HCondition, generate the comparison directly. Primitive::Type type = condition->InputAt(0)->GetType(); if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { - GenerateCompareTestAndBranch(condition, true_target, false_target); + GenerateCompareTestAndBranch(condition, true_target, false_target, far_target); return; } @@ -2679,14 +3085,14 @@ void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instru if (right.IsImmediate() && right.GetImmediate() == 0 && (arm_cond.Is(ne) || arm_cond.Is(eq))) { if (arm_cond.Is(eq)) { - __ CompareAndBranchIfZero(left, non_fallthrough_target); + __ CompareAndBranchIfZero(left, non_fallthrough_target, far_target); } else { DCHECK(arm_cond.Is(ne)); - __ CompareAndBranchIfNonZero(left, non_fallthrough_target); + __ CompareAndBranchIfNonZero(left, non_fallthrough_target, far_target); } } else { __ Cmp(left, right); - __ B(arm_cond, non_fallthrough_target); + __ B(arm_cond, non_fallthrough_target, far_target); } } @@ -2903,6 +3309,83 @@ void CodeGeneratorARMVIXL::GenerateNop() { __ Nop(); } +// `temp` is an extra temporary register that is used for some conditions; +// callers may not specify it, in which case the method will use a scratch +// register instead. +void CodeGeneratorARMVIXL::GenerateConditionWithZero(IfCondition condition, + vixl32::Register out, + vixl32::Register in, + vixl32::Register temp) { + switch (condition) { + case kCondEQ: + // x <= 0 iff x == 0 when the comparison is unsigned. + case kCondBE: + if (!temp.IsValid() || (out.IsLow() && !out.Is(in))) { + temp = out; + } + + // Avoid 32-bit instructions if possible; note that `in` and `temp` must be + // different as well. + if (in.IsLow() && temp.IsLow() && !in.Is(temp)) { + // temp = - in; only 0 sets the carry flag. + __ Rsbs(temp, in, 0); + + if (out.Is(in)) { + std::swap(in, temp); + } + + // out = - in + in + carry = carry + __ Adc(out, temp, in); + } else { + // If `in` is 0, then it has 32 leading zeros, and less than that otherwise. + __ Clz(out, in); + // Any number less than 32 logically shifted right by 5 bits results in 0; + // the same operation on 32 yields 1. + __ Lsr(out, out, 5); + } + + break; + case kCondNE: + // x > 0 iff x != 0 when the comparison is unsigned. + case kCondA: { + UseScratchRegisterScope temps(GetVIXLAssembler()); + + if (out.Is(in)) { + if (!temp.IsValid() || in.Is(temp)) { + temp = temps.Acquire(); + } + } else if (!temp.IsValid() || !temp.IsLow()) { + temp = out; + } + + // temp = in - 1; only 0 does not set the carry flag. + __ Subs(temp, in, 1); + // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry + __ Sbc(out, in, temp); + break; + } + case kCondGE: + __ Mvn(out, in); + in = out; + FALLTHROUGH_INTENDED; + case kCondLT: + // We only care about the sign bit. + __ Lsr(out, in, 31); + break; + case kCondAE: + // Trivially true. + __ Mov(out, 1); + break; + case kCondB: + // Trivially false. + __ Mov(out, 0); + break; + default: + LOG(FATAL) << "Unexpected condition " << condition; + UNREACHABLE(); + } +} + void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall); @@ -2939,52 +3422,47 @@ void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) { return; } - const vixl32::Register out = OutputRegister(cond); + const Primitive::Type type = cond->GetLeft()->GetType(); - if (out.IsLow() && CanGenerateTest(cond, codegen_->GetAssembler())) { - const auto condition = GenerateTest(cond, false, codegen_); - // We use the scope because of the IT block that follows. - ExactAssemblyScope guard(GetVIXLAssembler(), - 4 * vixl32::k16BitT32InstructionSizeInBytes, - CodeBufferCheckScope::kExactSize); - - __ it(condition.first); - __ mov(condition.first, out, 1); - __ it(condition.second); - __ mov(condition.second, out, 0); + if (Primitive::IsFloatingPointType(type)) { + GenerateConditionGeneric(cond, codegen_); return; } - // Convert the jumps into the result. - vixl32::Label done_label; - vixl32::Label* const final_label = codegen_->GetFinalLabel(cond, &done_label); + DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; - if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) { - vixl32::Label true_label, false_label; + const IfCondition condition = cond->GetCondition(); - GenerateLongComparesAndJumps(cond, &true_label, &false_label); + // A condition with only one boolean input, or two boolean inputs without being equality or + // inequality results from transformations done by the instruction simplifier, and is handled + // as a regular condition with integral inputs. + if (type == Primitive::kPrimBoolean && + cond->GetRight()->GetType() == Primitive::kPrimBoolean && + (condition == kCondEQ || condition == kCondNE)) { + vixl32::Register left = InputRegisterAt(cond, 0); + const vixl32::Register out = OutputRegister(cond); + const Location right_loc = cond->GetLocations()->InAt(1); - // False case: result = 0. - __ Bind(&false_label); - __ Mov(out, 0); - __ B(final_label); + // The constant case is handled by the instruction simplifier. + DCHECK(!right_loc.IsConstant()); - // True case: result = 1. - __ Bind(&true_label); - __ Mov(out, 1); - } else { - DCHECK(CanGenerateTest(cond, codegen_->GetAssembler())); + vixl32::Register right = RegisterFrom(right_loc); - const auto condition = GenerateTest(cond, false, codegen_); + // Avoid 32-bit instructions if possible. + if (out.Is(right)) { + std::swap(left, right); + } - __ Mov(LeaveFlags, out, 0); - __ B(condition.second, final_label, /* far_target */ false); - __ Mov(out, 1); - } + __ Eor(out, left, right); - if (done_label.IsReferenced()) { - __ Bind(&done_label); + if (condition == kCondEQ) { + __ Eor(out, out, 1); + } + + return; } + + GenerateConditionIntegralOrNonPrimitive(cond, codegen_); } void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) { @@ -3119,6 +3597,15 @@ void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant( // Will be generated at use site. } +void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* constructor_fence) { + constructor_fence->SetLocations(nullptr); +} + +void InstructionCodeGeneratorARMVIXL::VisitConstructorFence( + HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } @@ -5296,7 +5783,18 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation()) { + // If link-time thunks for the Baker read barrier are enabled, for AOT + // loads we need a temporary only if the offset is too big. + if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { + locations->AddTemp(Location::RequiresRegister()); + } + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } else { + locations->AddTemp(Location::RequiresRegister()); + } } } @@ -5763,11 +6261,35 @@ void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) { Location::RequiresRegister(), object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier. - // Also need for String compression feature. - if ((object_array_get_with_read_barrier && kUseBakerReadBarrier) - || (mirror::kUseStringCompression && instruction->IsStringCharAt())) { + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier. + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation() && + instruction->GetIndex()->IsConstant()) { + // Array loads with constant index are treated as field loads. + // If link-time thunks for the Baker read barrier are enabled, for AOT + // constant index loads we need a temporary only if the offset is too big. + uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); + uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); + offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot); + if (offset >= kReferenceLoadMinFarOffset) { + locations->AddTemp(Location::RequiresRegister()); + } + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays && + !Runtime::Current()->UseJitCompilation() && + !instruction->GetIndex()->IsConstant()) { + // We need a non-scratch temporary for the array data pointer. + locations->AddTemp(Location::RequiresRegister()); + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } else { + locations->AddTemp(Location::RequiresRegister()); + } + } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + // Also need a temporary for String compression feature. locations->AddTemp(Location::RequiresRegister()); } } @@ -5878,8 +6400,20 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { Location temp = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call. - codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); + if (index.IsConstant()) { + // Array load with a constant index can be treated as a field load. + data_offset += Int32ConstantFrom(index) << Primitive::ComponentSizeShift(type); + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + data_offset, + locations->GetTemp(0), + /* needs_null_check */ false); + } else { + codegen_->GenerateArrayLoadWithBakerReadBarrier( + instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false); + } } else { vixl32::Register out = OutputRegister(instruction); if (index.IsConstant()) { @@ -6315,6 +6849,16 @@ void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddress(HIntermediateAddr } } +void LocationsBuilderARMVIXL::VisitIntermediateAddressIndex( + HIntermediateAddressIndex* instruction) { + LOG(FATAL) << "Unreachable " << instruction->GetId(); +} + +void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddressIndex( + HIntermediateAddressIndex* instruction) { + LOG(FATAL) << "Unreachable " << instruction->GetId(); +} + void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) { RegisterSet caller_saves = RegisterSet::Empty(); InvokeRuntimeCallingConventionARMVIXL calling_convention; @@ -6707,21 +7251,15 @@ HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind( UNREACHABLE(); case HLoadClass::LoadKind::kReferrersClass: break; - case HLoadClass::LoadKind::kBootImageLinkTimeAddress: - DCHECK(!GetCompilerOptions().GetCompilePic()); - break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - DCHECK(GetCompilerOptions().GetCompilePic()); - break; - case HLoadClass::LoadKind::kBootImageAddress: - break; case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; case HLoadClass::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kBootImageAddress: + case HLoadClass::LoadKind::kRuntimeCall: break; } return desired_class_load_kind; @@ -6729,7 +7267,7 @@ HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind( void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConventionARMVIXL calling_convention; CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( cls, @@ -6769,13 +7307,20 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { // For non-Baker read barrier we have a temp-clobbering call. } } + if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { + if (load_kind == HLoadClass::LoadKind::kBssEntry || + (load_kind == HLoadClass::LoadKind::kReferrersClass && + !Runtime::Current()->UseJitCompilation())) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } + } } // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not // move. void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); return; } @@ -6802,13 +7347,6 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ read_barrier_option); break; } - case HLoadClass::LoadKind::kBootImageLinkTimeAddress: { - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - __ Ldr(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(), - cls->GetTypeIndex())); - break; - } case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); @@ -6844,7 +7382,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option); break; } - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: case HLoadClass::LoadKind::kInvalid: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -6905,21 +7443,15 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck( HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimeAddress: - DCHECK(!GetCompilerOptions().GetCompilePic()); - break; case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - DCHECK(GetCompilerOptions().GetCompilePic()); - break; - case HLoadString::LoadKind::kBootImageAddress: - break; case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kBootImageAddress: + case HLoadString::LoadKind::kRuntimeCall: break; } return desired_string_load_kind; @@ -6929,7 +7461,7 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); - if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadString::LoadKind::kRuntimeCall) { locations->SetOut(LocationFrom(r0)); } else { locations->SetOut(Location::RequiresRegister()); @@ -6945,6 +7477,9 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() // that the the kPrimNot result register is the same as the first argument register. locations->SetCustomSlowPathCallerSaves(caller_saves); + if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -6961,11 +7496,6 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE HLoadString::LoadKind load_kind = load->GetLoadKind(); switch (load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimeAddress: { - __ Ldr(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(), - load->GetStringIndex())); - return; // No dex cache slow path. - } case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = @@ -7009,7 +7539,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE } // TODO: Re-add the compiler code to do string dex cache lookup again. - DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod); + DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall); InvokeRuntimeCallingConventionARMVIXL calling_convention; __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); @@ -7107,6 +7637,9 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { // Note that TypeCheckSlowPathARM uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + codegen_->MaybeAddBakerCcEntrypointTempForFields(locations); + } } void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { @@ -8005,48 +8538,98 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. - // - // Note that we do not actually check the value of - // `GetIsGcMarking()` to decide whether to mark the loaded GC - // root or not. Instead, we load into `temp` the read barrier - // mark entry point corresponding to register `root`. If `temp` - // is null, it means that `GetIsGcMarking()` is false, and vice - // versa. - // - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() - // // Slow path. - // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. - // } - - // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. - Location temp = LocationFrom(lr); - SlowPathCodeARMVIXL* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL( - instruction, root, /* entrypoint */ temp); - codegen_->AddSlowPath(slow_path); + if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk + // checks the reference and jumps to the entrypoint if needed. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &return_address; + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { + // goto gc_root_thunk<root_reg>(lr) + // } + // return_address: - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset); + UseScratchRegisterScope temps(GetVIXLAssembler()); + ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); + bool narrow = CanEmitNarrowLdr(root_reg, obj, offset); + uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData( + root_reg.GetCode(), narrow); + vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip.GetCode(), 12u); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); + __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset)); + + vixl::EmissionCheckScope guard(GetVIXLAssembler(), + 4 * vixl32::kMaxInstructionSizeInBytes); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(kBakerCcEntrypointRegister, Operand(0)); + // Currently the offset is always within range. If that changes, + // we shall have to split the load the same way as for fields. + DCHECK_LT(offset, kReferenceLoadMinFarOffset); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset)); + EmitPlaceholderBne(codegen_, bne_label); + __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET); + } else { + // Note that we do not actually check the value of + // `GetIsGcMarking()` to decide whether to mark the loaded GC + // root or not. Instead, we load into `temp` the read barrier + // mark entry point corresponding to register `root`. If `temp` + // is null, it means that `GetIsGcMarking()` is false, and vice + // versa. + // + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. + // } + + // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. + Location temp = LocationFrom(lr); + SlowPathCodeARMVIXL* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL( + instruction, root, /* entrypoint */ temp); + codegen_->AddSlowPath(slow_path); - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset); - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset); + + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } else { // GC root loaded through a slow path for read barriers other // than Baker's. @@ -8064,6 +8647,16 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( } } +void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierLinkTimeThunksEnableForFields) { + if (!Runtime::Current()->UseJitCompilation()) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } + } +} + void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl32::Register obj, @@ -8073,6 +8666,85 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &gray_return_address; + // if (temp != nullptr) { + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // HeapReference<mirror::Object> reference = *(obj+offset); + // gray_return_address: + + DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); + bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset); + vixl32::Register base = obj; + if (offset >= kReferenceLoadMinFarOffset) { + base = RegisterFrom(temp); + DCHECK(!base.Is(kBakerCcEntrypointRegister)); + static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); + __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); + offset &= (kReferenceLoadMinFarOffset - 1u); + // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large + // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely + // increase the overall code size when taking the generated thunks into account. + DCHECK(!narrow); + } + UseScratchRegisterScope temps(GetVIXLAssembler()); + ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); + uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + base.GetCode(), obj.GetCode(), narrow); + vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip.GetCode(), 12u); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); + __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset)); + + vixl::EmissionCheckScope guard( + GetVIXLAssembler(), + (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(kBakerCcEntrypointRegister, Operand(0)); + EmitPlaceholderBne(this, bne_label); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset)); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // Note: We need a specific width for the unpoisoning NEG. + if (kPoisonHeapReferences) { + if (narrow) { + // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB). + __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0)); + } else { + __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + } + } + __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); + return; + } + // /* HeapReference<Object> */ ref = *(obj + offset) Location no_index = Location::NoLocation(); ScaleFactor no_scale_factor = TIMES_1; @@ -8093,9 +8765,73 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + ScaleFactor scale_factor = TIMES_4; + + if (kBakerReadBarrierLinkTimeThunksEnableForArrays && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &gray_return_address; + // if (temp != nullptr) { + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // HeapReference<mirror::Object> reference = data[index]; + // gray_return_address: + + DCHECK(index.IsValid()); + vixl32::Register index_reg = RegisterFrom(index, Primitive::kPrimInt); + vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); + vixl32::Register data_reg = RegisterFrom(temp, Primitive::kPrimInt); // Raw pointer. + DCHECK(!data_reg.Is(kBakerCcEntrypointRegister)); + + UseScratchRegisterScope temps(GetVIXLAssembler()); + ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); + uint32_t custom_data = + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg.GetCode()); + vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip.GetCode(), 12u); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); + __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset)); + __ Add(data_reg, obj, Operand(data_offset)); + + vixl::EmissionCheckScope guard( + GetVIXLAssembler(), + (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(kBakerCcEntrypointRegister, Operand(0)); + EmitPlaceholderBne(this, bne_label); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor)); + DCHECK(!needs_null_check); // The thunk cannot handle the null check. + // Note: We need a Wide NEG for the unpoisoning. + if (kPoisonHeapReferences) { + __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + } + __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); + return; + } + // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - ScaleFactor scale_factor = TIMES_4; GenerateReferenceLoadWithBakerReadBarrier( instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check); } @@ -8107,9 +8843,7 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check, - bool always_update_field, - vixl32::Register* temp2) { + bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -8120,6 +8854,73 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio // not. // // Note that we do not actually check the value of `GetIsGcMarking()`; + // instead, we load into `temp2` the read barrier mark entry point + // corresponding to register `ref`. If `temp2` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // } + // } else { + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // } + + vixl32::Register temp_reg = RegisterFrom(temp); + + // Slow path marking the object `ref` when the GC is marking. The + // entrypoint will already be loaded in `temp2`. + Location temp2 = LocationFrom(lr); + SlowPathCodeARMVIXL* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL( + instruction, + ref, + obj, + offset, + index, + scale_factor, + needs_null_check, + temp_reg, + /* entrypoint */ temp2); + AddSlowPath(slow_path); + + // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp2), tr, entry_point_offset); + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ CompareAndBranchIfNonZero(RegisterFrom(temp2), slow_path->GetEntryLabel()); + // Fast path: the GC is not marking: just load the reference. + GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl32::Register obj, + Location field_offset, + Location temp, + bool needs_null_check, + vixl32::Register temp2) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // Query `art::Thread::Current()->GetIsGcMarking()` to decide + // whether we need to enter the slow path to update the reference + // field within `obj`. Then, in the slow path, check the gray bit + // in the lock word of the reference's holder (`obj`) to decide + // whether to mark `ref` and update the field or not. + // + // Note that we do not actually check the value of `GetIsGcMarking()`; // instead, we load into `temp3` the read barrier mark entry point // corresponding to register `ref`. If `temp3` is null, it means // that `GetIsGcMarking()` is false, and vice versa. @@ -8129,55 +8930,32 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio // // Slow path. // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. + // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load. // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { + // old_ref = ref; // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); // } - // } else { - // HeapReference<mirror::Object> ref = *src; // Original reference load. // } vixl32::Register temp_reg = RegisterFrom(temp); - // Slow path marking the object `ref` when the GC is marking. The - // entrypoint will already be loaded in `temp3`. + // Slow path updating the object reference at address `obj + field_offset` + // when the GC is marking. The entrypoint will already be loaded in `temp3`. Location temp3 = LocationFrom(lr); - SlowPathCodeARMVIXL* slow_path; - if (always_update_field) { - DCHECK(temp2 != nullptr); - // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL - // only supports address of the form `obj + field_offset`, where - // `obj` is a register and `field_offset` is a register pair (of - // which only the lower half is used). Thus `offset` and - // `scale_factor` above are expected to be null in this code path. - DCHECK_EQ(offset, 0u); - DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); - Location field_offset = index; - slow_path = - new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL( - instruction, - ref, - obj, - offset, - /* index */ field_offset, - scale_factor, - needs_null_check, - temp_reg, - *temp2, - /* entrypoint */ temp3); - } else { - slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL( - instruction, - ref, - obj, - offset, - index, - scale_factor, - needs_null_check, - temp_reg, - /* entrypoint */ temp3); - } + SlowPathCodeARMVIXL* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL( + instruction, + ref, + obj, + /* offset */ 0u, + /* index */ field_offset, + /* scale_factor */ ScaleFactor::TIMES_1, + needs_null_check, + temp_reg, + temp2, + /* entrypoint */ temp3); AddSlowPath(slow_path); // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() @@ -8189,8 +8967,8 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio // The entrypoint is null when the GC is not marking, this prevents one load compared to // checking GetIsGcMarking. __ CompareAndBranchIfNonZero(RegisterFrom(temp3), slow_path->GetEntryLabel()); - // Fast path: just load the reference. - GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); + // Fast path: the GC is not marking: nothing to do (the field is + // up-to-date, and we don't need to load the reference). __ Bind(slow_path->GetExitLabel()); } @@ -8348,7 +9126,7 @@ vixl32::Register CodeGeneratorARMVIXL::GetInvokeStaticOrDirectExtraParameter( return RegisterFrom(location); } -Location CodeGeneratorARMVIXL::GenerateCalleeMethodStaticOrDirectCall( +void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( HInvokeStaticOrDirect* invoke, Location temp) { Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { @@ -8362,6 +9140,13 @@ Location CodeGeneratorARMVIXL::GenerateCalleeMethodStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { + DCHECK(GetCompilerOptions().IsBootImage()); + PcRelativePatchInfo* labels = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + vixl32::Register temp_reg = RegisterFrom(temp); + EmitMovwMovtPlaceholder(labels, temp_reg); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress())); break; @@ -8399,12 +9184,6 @@ Location CodeGeneratorARMVIXL::GenerateCalleeMethodStaticOrDirectCall( break; } } - return callee_method; -} - -void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Location temp) { - Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp); switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: @@ -8478,9 +9257,11 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall(HInvokeVirtual* invoke, Location __ blx(lr); } -CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeStringPatch( - const DexFile& dex_file, dex::StringIndex string_index) { - return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeMethodPatch( + MethodReference target_method) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + &pc_relative_method_patches_); } CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeTypePatch( @@ -8493,6 +9274,11 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewTypeBssEntry return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_); } +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeStringPatch( + const DexFile& dex_file, dex::StringIndex string_index) { + return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +} + CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeDexCacheArrayPatch( const DexFile& dex_file, uint32_t element_offset) { return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); @@ -8504,34 +9290,15 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePa return &patches->back(); } -VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageStringLiteral( - const DexFile& dex_file, - dex::StringIndex string_index) { - return boot_image_string_patches_.GetOrCreate( - StringReference(&dex_file, string_index), - [this]() { - return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); - }); -} - -VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageTypeLiteral( - const DexFile& dex_file, - dex::TypeIndex type_index) { - return boot_image_type_patches_.GetOrCreate( - TypeReference(&dex_file, type_index), - [this]() { - return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); - }); +vixl::aarch32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) { + baker_read_barrier_patches_.emplace_back(custom_data); + return &baker_read_barrier_patches_.back().label; } VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) { return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_); } -VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateDexCacheAddressLiteral(uint32_t address) { - return DeduplicateUint32Literal(address, &uint32_literals_); -} - VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral( const DexFile& dex_file, dex::StringIndex string_index, @@ -8581,43 +9348,32 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa DCHECK(linker_patches->empty()); size_t size = /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() + - boot_image_string_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + - boot_image_type_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_method_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size(); + /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + + baker_read_barrier_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); - for (const auto& entry : boot_image_string_patches_) { - const StringReference& target_string = entry.first; - VIXLUInt32Literal* literal = entry.second; - DCHECK(literal->IsBound()); - uint32_t literal_offset = literal->GetLocation(); - linker_patches->push_back(LinkerPatch::StringPatch(literal_offset, - target_string.dex_file, - target_string.string_index.index_)); - } - if (!GetCompilerOptions().IsBootImage()) { - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); - } else { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, linker_patches); + } else { + DCHECK(pc_relative_method_patches_.empty()); + DCHECK(pc_relative_type_patches_.empty()); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); - for (const auto& entry : boot_image_type_patches_) { - const TypeReference& target_type = entry.first; - VIXLUInt32Literal* literal = entry.second; - DCHECK(literal->IsBound()); - uint32_t literal_offset = literal->GetLocation(); - linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, - target_type.dex_file, - target_type.type_index.index_)); + for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { + linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(), + info.custom_data)); } DCHECK_EQ(size, linker_patches->size()); } @@ -8632,16 +9388,6 @@ VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal( }); } -VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateMethodLiteral( - MethodReference target_method, - MethodToLiteralMap* map) { - return map->GetOrCreate( - target_method, - [this]() { - return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); - }); -} - void LocationsBuilderARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall); @@ -8855,14 +9601,20 @@ static void PatchJitRootUse(uint8_t* code, void CodeGeneratorARMVIXL::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { for (const auto& entry : jit_string_patches_) { - const auto& it = jit_string_roots_.find(entry.first); + const StringReference& string_reference = entry.first; + VIXLUInt32Literal* table_entry_literal = entry.second; + const auto it = jit_string_roots_.find(string_reference); DCHECK(it != jit_string_roots_.end()); - PatchJitRootUse(code, roots_data, entry.second, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } for (const auto& entry : jit_class_patches_) { - const auto& it = jit_class_roots_.find(entry.first); + const TypeReference& type_reference = entry.first; + VIXLUInt32Literal* table_entry_literal = entry.second; + const auto it = jit_class_roots_.find(type_reference); DCHECK(it != jit_class_roots_.end()); - PatchJitRootUse(code, roots_data, entry.second, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } } diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 1e9669dc38..f6e4de33a8 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -24,8 +24,8 @@ #include "nodes.h" #include "string_reference.h" #include "parallel_move_resolver.h" +#include "type_reference.h" #include "utils/arm/assembler_arm_vixl.h" -#include "utils/type_reference.h" // TODO(VIXL): make vixl clean wrt -Wshadow. #pragma GCC diagnostic push @@ -400,10 +400,8 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { bool far_target = true); void GenerateCompareTestAndBranch(HCondition* condition, vixl::aarch32::Label* true_target, - vixl::aarch32::Label* false_target); - void GenerateLongComparesAndJumps(HCondition* cond, - vixl::aarch32::Label* true_label, - vixl::aarch32::Label* false_label); + vixl::aarch32::Label* false_target, + bool is_far_target = true); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); @@ -540,7 +538,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; @@ -566,18 +563,19 @@ class CodeGeneratorARMVIXL : public CodeGenerator { vixl::aarch32::Label add_pc_label; }; - PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index); + PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); + PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, + dex::StringIndex string_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); - VIXLUInt32Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, - dex::StringIndex string_index); - VIXLUInt32Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, - dex::TypeIndex type_index); + + // Add a new baker read barrier patch and return the label to be bound + // before the BNE instruction. + vixl::aarch32::Label* NewBakerReadBarrierPatch(uint32_t custom_data); + VIXLUInt32Literal* DeduplicateBootImageAddressLiteral(uint32_t address); - VIXLUInt32Literal* DeduplicateDexCacheAddressLiteral(uint32_t address); VIXLUInt32Literal* DeduplicateJitStringLiteral(const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle); @@ -589,6 +587,10 @@ class CodeGeneratorARMVIXL : public CodeGenerator { void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + // Maybe add the reserved entrypoint register as a temporary for field load. This temp + // is added only for AOT compilation if link-time generated thunks for fields are enabled. + void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations); + // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -612,11 +614,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator { // Load the object reference located at the address // `obj + offset + (index << scale_factor)`, held by object `obj`, into // `ref`, and mark it if needed. - // - // If `always_update_field` is true, the value of the reference is - // atomically updated in the holder (`obj`). This operation - // requires an extra temporary register, which must be provided as a - // non-null pointer (`temp2`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl::aarch32::Register obj, @@ -624,9 +621,27 @@ class CodeGeneratorARMVIXL : public CodeGenerator { Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check, - bool always_update_field = false, - vixl::aarch32::Register* temp2 = nullptr); + bool needs_null_check); + + // Generate code checking whether the the reference field at the + // address `obj + field_offset`, held by object `obj`, needs to be + // marked, and if so, marking it and updating the field within `obj` + // with the marked value. + // + // This routine is used for the implementation of the + // UnsafeCASObject intrinsic with Baker read barriers. + // + // This method has a structure similar to + // GenerateReferenceLoadWithBakerReadBarrier, but note that argument + // `ref` is only as a temporary here, and thus its value should not + // be used afterwards. + void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::aarch32::Register obj, + Location field_offset, + Location temp, + bool needs_null_check, + vixl::aarch32::Register temp2); // Generate a heap reference load (with no read barrier). void GenerateRawReferenceLoad(HInstruction* instruction, @@ -699,13 +714,19 @@ class CodeGeneratorARMVIXL : public CodeGenerator { void EmitMovwMovtPlaceholder(CodeGeneratorARMVIXL::PcRelativePatchInfo* labels, vixl::aarch32::Register out); + // `temp` is an extra temporary register that is used for some conditions; + // callers may not specify it, in which case the method will use a scratch + // register instead. + void GenerateConditionWithZero(IfCondition condition, + vixl::aarch32::Register out, + vixl::aarch32::Register in, + vixl::aarch32::Register temp = vixl32::Register()); + private: vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, vixl::aarch32::Register temp); using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, VIXLUInt32Literal*>; - using MethodToLiteralMap = - ArenaSafeMap<MethodReference, VIXLUInt32Literal*, MethodReferenceComparator>; using StringToLiteralMap = ArenaSafeMap<StringReference, VIXLUInt32Literal*, StringReferenceValueComparator>; @@ -713,9 +734,14 @@ class CodeGeneratorARMVIXL : public CodeGenerator { VIXLUInt32Literal*, TypeReferenceValueComparator>; + struct BakerReadBarrierPatchInfo { + explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { } + + vixl::aarch32::Label label; + uint32_t custom_data; + }; + VIXLUInt32Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); - VIXLUInt32Literal* DeduplicateMethodLiteral(MethodReference target_method, - MethodToLiteralMap* map); PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches); @@ -740,16 +766,16 @@ class CodeGeneratorARMVIXL : public CodeGenerator { Uint32ToLiteralMap uint32_literals_; // PC-relative patch info for each HArmDexCacheArraysBase. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; - // Deduplication map for boot string literals for kBootImageLinkTimeAddress. - StringToLiteralMap boot_image_string_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; - // Deduplication map for boot type literals for kBootImageLinkTimeAddress. - TypeToLiteralMap boot_image_type_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // Baker read barrier patch info. + ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; // Patches for string literals in JIT compiled code. StringToLiteralMap jit_string_patches_; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index b7e602420c..951d75a708 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -219,15 +219,33 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); + Location out = locations->Out(); CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - + const bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6(); + const bool r2_baker_or_no_read_barriers = !isR6 && (!kUseReadBarrier || kUseBakerReadBarrier); + InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); + const bool is_load_class_bss_entry = + (cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); - InvokeRuntimeCallingConvention calling_convention; + // For HLoadClass/kBssEntry/kSaveEverything, make sure we preserve the address of the entry. + Register entry_address = kNoRegister; + if (is_load_class_bss_entry && r2_baker_or_no_read_barriers) { + Register temp = locations->GetTemp(0).AsRegister<Register>(); + bool temp_is_a0 = (temp == calling_convention.GetRegisterAt(0)); + // In the unlucky case that `temp` is A0, we preserve the address in `out` across the + // kSaveEverything call. + entry_address = temp_is_a0 ? out.AsRegister<Register>() : temp; + DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0)); + if (temp_is_a0) { + __ Move(entry_address, temp); + } + } + dex::TypeIndex type_index = cls_->GetTypeIndex(); __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_); - QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage : kQuickInitializeType; mips_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this); @@ -237,25 +255,27 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); } + // For HLoadClass/kBssEntry, store the resolved class to the BSS entry. + if (is_load_class_bss_entry && r2_baker_or_no_read_barriers) { + // The class entry address was preserved in `entry_address` thanks to kSaveEverything. + __ StoreToOffset(kStoreWord, calling_convention.GetRegisterAt(0), entry_address, 0); + } + // Move the class to the desired location. - Location out = locations->Out(); if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); Primitive::Type type = instruction_->GetType(); - mips_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type); + mips_codegen->MoveLocation(out, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + type); } - RestoreLiveRegisters(codegen, locations); - // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry. - DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); - if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) { - DCHECK(out.IsValid()); - // TODO: Change art_quick_initialize_type/art_quick_initialize_static_storage to - // kSaveEverything and use a temporary for the .bss entry address in the fast path, - // so that we can avoid another calculation here. - bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6(); + + // For HLoadClass/kBssEntry, store the resolved class to the BSS entry. + if (is_load_class_bss_entry && !r2_baker_or_no_read_barriers) { + // For non-Baker read barriers (or on R6), we need to re-calculate the address of + // the class entry. Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); - DCHECK_NE(out.AsRegister<Register>(), AT); CodeGeneratorMIPS::PcRelativePatchInfo* info = mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index); bool reordering = __ SetReorder(false); @@ -286,40 +306,62 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { explicit LoadStringSlowPathMIPS(HLoadString* instruction) : SlowPathCodeMIPS(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + DCHECK(instruction_->IsLoadString()); + DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry); LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + HLoadString* load = instruction_->AsLoadString(); + const dex::StringIndex string_index = load->GetStringIndex(); + Register out = locations->Out().AsRegister<Register>(); CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - + const bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6(); + const bool r2_baker_or_no_read_barriers = !isR6 && (!kUseReadBarrier || kUseBakerReadBarrier); + InvokeRuntimeCallingConvention calling_convention; __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); - InvokeRuntimeCallingConvention calling_convention; - HLoadString* load = instruction_->AsLoadString(); - const dex::StringIndex string_index = load->GetStringIndex(); + // For HLoadString/kBssEntry/kSaveEverything, make sure we preserve the address of the entry. + Register entry_address = kNoRegister; + if (r2_baker_or_no_read_barriers) { + Register temp = locations->GetTemp(0).AsRegister<Register>(); + bool temp_is_a0 = (temp == calling_convention.GetRegisterAt(0)); + // In the unlucky case that `temp` is A0, we preserve the address in `out` across the + // kSaveEverything call. + entry_address = temp_is_a0 ? out : temp; + DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0)); + if (temp_is_a0) { + __ Move(entry_address, temp); + } + } + __ LoadConst32(calling_convention.GetRegisterAt(0), string_index.index_); mips_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); + + // Store the resolved string to the BSS entry. + if (r2_baker_or_no_read_barriers) { + // The string entry address was preserved in `entry_address` thanks to kSaveEverything. + __ StoreToOffset(kStoreWord, calling_convention.GetRegisterAt(0), entry_address, 0); + } + Primitive::Type type = instruction_->GetType(); mips_codegen->MoveLocation(locations->Out(), - calling_convention.GetReturnLocation(type), + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), type); - RestoreLiveRegisters(codegen, locations); - // Store the resolved String to the BSS entry. - // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the - // .bss entry address in the fast path, so that we can avoid another calculation here. - bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6(); - Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - DCHECK_NE(out, AT); - CodeGeneratorMIPS::PcRelativePatchInfo* info = - mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); - bool reordering = __ SetReorder(false); - mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info, TMP, base); - __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678); - __ SetReorder(reordering); - + // Store the resolved string to the BSS entry. + if (!r2_baker_or_no_read_barriers) { + // For non-Baker read barriers (or on R6), we need to re-calculate the address of + // the string entry. + Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); + CodeGeneratorMIPS::PcRelativePatchInfo* info = + mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); + bool reordering = __ SetReorder(false); + mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info, TMP, base); + __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678); + __ SetReorder(reordering); + } __ B(GetExitLabel()); } @@ -1019,13 +1061,10 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, uint32_literals_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - boot_image_string_patches_(StringReferenceValueComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - boot_image_type_patches_(TypeReferenceValueComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), clobbered_ra_(false) { @@ -1564,50 +1603,36 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch DCHECK(linker_patches->empty()); size_t size = pc_relative_dex_cache_patches_.size() + - pc_relative_string_patches_.size() + + pc_relative_method_patches_.size() + pc_relative_type_patches_.size() + type_bss_entry_patches_.size() + - boot_image_string_patches_.size() + - boot_image_type_patches_.size(); + pc_relative_string_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); - if (!GetCompilerOptions().IsBootImage()) { - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); - } else { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, linker_patches); + } else { + DCHECK(pc_relative_method_patches_.empty()); + DCHECK(pc_relative_type_patches_.empty()); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); - for (const auto& entry : boot_image_string_patches_) { - const StringReference& target_string = entry.first; - Literal* literal = entry.second; - DCHECK(literal->GetLabel()->IsBound()); - uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); - linker_patches->push_back(LinkerPatch::StringPatch(literal_offset, - target_string.dex_file, - target_string.string_index.index_)); - } - for (const auto& entry : boot_image_type_patches_) { - const TypeReference& target_type = entry.first; - Literal* literal = entry.second; - DCHECK(literal->GetLabel()->IsBound()); - uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); - linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, - target_type.dex_file, - target_type.type_index.index_)); - } DCHECK_EQ(size, linker_patches->size()); } -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch( - const DexFile& dex_file, dex::StringIndex string_index) { - return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeMethodPatch( + MethodReference target_method) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + &pc_relative_method_patches_); } CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeTypePatch( @@ -1620,6 +1645,11 @@ CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewTypeBssEntryPatch( return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_); } +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch( + const DexFile& dex_file, dex::StringIndex string_index) { + return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +} + CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch( const DexFile& dex_file, uint32_t element_offset) { return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); @@ -1637,27 +1667,6 @@ Literal* CodeGeneratorMIPS::DeduplicateUint32Literal(uint32_t value, Uint32ToLit [this, value]() { return __ NewLiteral<uint32_t>(value); }); } -Literal* CodeGeneratorMIPS::DeduplicateMethodLiteral(MethodReference target_method, - MethodToLiteralMap* map) { - return map->GetOrCreate( - target_method, - [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); -} - -Literal* CodeGeneratorMIPS::DeduplicateBootImageStringLiteral(const DexFile& dex_file, - dex::StringIndex string_index) { - return boot_image_string_patches_.GetOrCreate( - StringReference(&dex_file, string_index), - [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); -} - -Literal* CodeGeneratorMIPS::DeduplicateBootImageTypeLiteral(const DexFile& dex_file, - dex::TypeIndex type_index) { - return boot_image_type_patches_.GetOrCreate( - TypeReference(&dex_file, type_index), - [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); -} - Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) { return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_); } @@ -1665,6 +1674,7 @@ Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info, Register out, Register base) { + DCHECK_NE(out, base); if (GetInstructionSetFeatures().IsR6()) { DCHECK_EQ(base, ZERO); __ Bind(&info->high_label); @@ -1724,31 +1734,32 @@ void CodeGeneratorMIPS::PatchJitRootUse(uint8_t* code, DCHECK_EQ(code[literal_offset + 1], 0x12); DCHECK_EQ((code[literal_offset + 2] & 0xE0), 0x00); DCHECK_EQ(code[literal_offset + 3], 0x3C); - // lw reg, reg, addr32_low + // instr reg, reg, addr32_low DCHECK_EQ(code[literal_offset + 4], 0x78); DCHECK_EQ(code[literal_offset + 5], 0x56); - DCHECK_EQ((code[literal_offset + 7] & 0xFC), 0x8C); - addr32 += (addr32 & 0x8000) << 1; // Account for sign extension in "lw reg, reg, addr32_low". + addr32 += (addr32 & 0x8000) << 1; // Account for sign extension in "instr reg, reg, addr32_low". // lui reg, addr32_high code[literal_offset + 0] = static_cast<uint8_t>(addr32 >> 16); code[literal_offset + 1] = static_cast<uint8_t>(addr32 >> 24); - // lw reg, reg, addr32_low + // instr reg, reg, addr32_low code[literal_offset + 4] = static_cast<uint8_t>(addr32 >> 0); code[literal_offset + 5] = static_cast<uint8_t>(addr32 >> 8); } void CodeGeneratorMIPS::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { for (const JitPatchInfo& info : jit_string_patches_) { - const auto& it = jit_string_roots_.find(StringReference(&info.target_dex_file, - dex::StringIndex(info.index))); + const auto it = jit_string_roots_.find(StringReference(&info.target_dex_file, + dex::StringIndex(info.index))); DCHECK(it != jit_string_roots_.end()); - PatchJitRootUse(code, roots_data, info, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, info, index_in_table); } for (const JitPatchInfo& info : jit_class_patches_) { - const auto& it = jit_class_roots_.find(TypeReference(&info.target_dex_file, - dex::TypeIndex(info.index))); + const auto it = jit_class_roots_.find(TypeReference(&info.target_dex_file, + dex::TypeIndex(info.index))); DCHECK(it != jit_class_roots_.end()); - PatchJitRootUse(code, roots_data, info, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, info, index_in_table); } } @@ -2441,6 +2452,9 @@ void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) { object_array_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(type)) { @@ -3443,8 +3457,6 @@ void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) { Primitive::Type type = instruction->InputAt(0)->GetType(); LocationSummary* locations = instruction->GetLocations(); - Register dst = locations->Out().AsRegister<Register>(); - MipsLabel true_label; switch (type) { default: @@ -3453,27 +3465,14 @@ void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) { return; case Primitive::kPrimLong: - // TODO: don't use branches. - GenerateLongCompareAndBranch(instruction->GetCondition(), locations, &true_label); - break; + GenerateLongCompare(instruction->GetCondition(), locations); + return; case Primitive::kPrimFloat: case Primitive::kPrimDouble: GenerateFpCompare(instruction->GetCondition(), instruction->IsGtBias(), type, locations); return; } - - // Convert the branches into the result. - MipsLabel done; - - // False case: result = 0. - __ LoadConst32(dst, 0); - __ B(&done); - - // True case: result = 1. - __ Bind(&true_label); - __ LoadConst32(dst, 1); - __ Bind(&done); } void InstructionCodeGeneratorMIPS::DivRemOneOrMinusOne(HBinaryOperation* instruction) { @@ -4243,6 +4242,221 @@ void InstructionCodeGeneratorMIPS::GenerateIntCompareAndBranch(IfCondition cond, } } +void InstructionCodeGeneratorMIPS::GenerateLongCompare(IfCondition cond, + LocationSummary* locations) { + Register dst = locations->Out().AsRegister<Register>(); + Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>(); + Location rhs_location = locations->InAt(1); + Register rhs_high = ZERO; + Register rhs_low = ZERO; + int64_t imm = 0; + uint32_t imm_high = 0; + uint32_t imm_low = 0; + bool use_imm = rhs_location.IsConstant(); + if (use_imm) { + imm = rhs_location.GetConstant()->AsLongConstant()->GetValue(); + imm_high = High32Bits(imm); + imm_low = Low32Bits(imm); + } else { + rhs_high = rhs_location.AsRegisterPairHigh<Register>(); + rhs_low = rhs_location.AsRegisterPairLow<Register>(); + } + if (use_imm && imm == 0) { + switch (cond) { + case kCondEQ: + case kCondBE: // <= 0 if zero + __ Or(dst, lhs_high, lhs_low); + __ Sltiu(dst, dst, 1); + break; + case kCondNE: + case kCondA: // > 0 if non-zero + __ Or(dst, lhs_high, lhs_low); + __ Sltu(dst, ZERO, dst); + break; + case kCondLT: + __ Slt(dst, lhs_high, ZERO); + break; + case kCondGE: + __ Slt(dst, lhs_high, ZERO); + __ Xori(dst, dst, 1); + break; + case kCondLE: + __ Or(TMP, lhs_high, lhs_low); + __ Sra(AT, lhs_high, 31); + __ Sltu(dst, AT, TMP); + __ Xori(dst, dst, 1); + break; + case kCondGT: + __ Or(TMP, lhs_high, lhs_low); + __ Sra(AT, lhs_high, 31); + __ Sltu(dst, AT, TMP); + break; + case kCondB: // always false + __ Andi(dst, dst, 0); + break; + case kCondAE: // always true + __ Ori(dst, ZERO, 1); + break; + } + } else if (use_imm) { + // TODO: more efficient comparison with constants without loading them into TMP/AT. + switch (cond) { + case kCondEQ: + __ LoadConst32(TMP, imm_high); + __ Xor(TMP, TMP, lhs_high); + __ LoadConst32(AT, imm_low); + __ Xor(AT, AT, lhs_low); + __ Or(dst, TMP, AT); + __ Sltiu(dst, dst, 1); + break; + case kCondNE: + __ LoadConst32(TMP, imm_high); + __ Xor(TMP, TMP, lhs_high); + __ LoadConst32(AT, imm_low); + __ Xor(AT, AT, lhs_low); + __ Or(dst, TMP, AT); + __ Sltu(dst, ZERO, dst); + break; + case kCondLT: + case kCondGE: + if (dst == lhs_low) { + __ LoadConst32(TMP, imm_low); + __ Sltu(dst, lhs_low, TMP); + } + __ LoadConst32(TMP, imm_high); + __ Slt(AT, lhs_high, TMP); + __ Slt(TMP, TMP, lhs_high); + if (dst != lhs_low) { + __ LoadConst32(dst, imm_low); + __ Sltu(dst, lhs_low, dst); + } + __ Slt(dst, TMP, dst); + __ Or(dst, dst, AT); + if (cond == kCondGE) { + __ Xori(dst, dst, 1); + } + break; + case kCondGT: + case kCondLE: + if (dst == lhs_low) { + __ LoadConst32(TMP, imm_low); + __ Sltu(dst, TMP, lhs_low); + } + __ LoadConst32(TMP, imm_high); + __ Slt(AT, TMP, lhs_high); + __ Slt(TMP, lhs_high, TMP); + if (dst != lhs_low) { + __ LoadConst32(dst, imm_low); + __ Sltu(dst, dst, lhs_low); + } + __ Slt(dst, TMP, dst); + __ Or(dst, dst, AT); + if (cond == kCondLE) { + __ Xori(dst, dst, 1); + } + break; + case kCondB: + case kCondAE: + if (dst == lhs_low) { + __ LoadConst32(TMP, imm_low); + __ Sltu(dst, lhs_low, TMP); + } + __ LoadConst32(TMP, imm_high); + __ Sltu(AT, lhs_high, TMP); + __ Sltu(TMP, TMP, lhs_high); + if (dst != lhs_low) { + __ LoadConst32(dst, imm_low); + __ Sltu(dst, lhs_low, dst); + } + __ Slt(dst, TMP, dst); + __ Or(dst, dst, AT); + if (cond == kCondAE) { + __ Xori(dst, dst, 1); + } + break; + case kCondA: + case kCondBE: + if (dst == lhs_low) { + __ LoadConst32(TMP, imm_low); + __ Sltu(dst, TMP, lhs_low); + } + __ LoadConst32(TMP, imm_high); + __ Sltu(AT, TMP, lhs_high); + __ Sltu(TMP, lhs_high, TMP); + if (dst != lhs_low) { + __ LoadConst32(dst, imm_low); + __ Sltu(dst, dst, lhs_low); + } + __ Slt(dst, TMP, dst); + __ Or(dst, dst, AT); + if (cond == kCondBE) { + __ Xori(dst, dst, 1); + } + break; + } + } else { + switch (cond) { + case kCondEQ: + __ Xor(TMP, lhs_high, rhs_high); + __ Xor(AT, lhs_low, rhs_low); + __ Or(dst, TMP, AT); + __ Sltiu(dst, dst, 1); + break; + case kCondNE: + __ Xor(TMP, lhs_high, rhs_high); + __ Xor(AT, lhs_low, rhs_low); + __ Or(dst, TMP, AT); + __ Sltu(dst, ZERO, dst); + break; + case kCondLT: + case kCondGE: + __ Slt(TMP, rhs_high, lhs_high); + __ Sltu(AT, lhs_low, rhs_low); + __ Slt(TMP, TMP, AT); + __ Slt(AT, lhs_high, rhs_high); + __ Or(dst, AT, TMP); + if (cond == kCondGE) { + __ Xori(dst, dst, 1); + } + break; + case kCondGT: + case kCondLE: + __ Slt(TMP, lhs_high, rhs_high); + __ Sltu(AT, rhs_low, lhs_low); + __ Slt(TMP, TMP, AT); + __ Slt(AT, rhs_high, lhs_high); + __ Or(dst, AT, TMP); + if (cond == kCondLE) { + __ Xori(dst, dst, 1); + } + break; + case kCondB: + case kCondAE: + __ Sltu(TMP, rhs_high, lhs_high); + __ Sltu(AT, lhs_low, rhs_low); + __ Slt(TMP, TMP, AT); + __ Sltu(AT, lhs_high, rhs_high); + __ Or(dst, AT, TMP); + if (cond == kCondAE) { + __ Xori(dst, dst, 1); + } + break; + case kCondA: + case kCondBE: + __ Sltu(TMP, lhs_high, rhs_high); + __ Sltu(AT, rhs_low, lhs_low); + __ Slt(TMP, TMP, AT); + __ Sltu(AT, rhs_high, lhs_high); + __ Or(dst, AT, TMP); + if (cond == kCondBE) { + __ Xori(dst, dst, 1); + } + break; + } + } +} + void InstructionCodeGeneratorMIPS::GenerateLongCompareAndBranch(IfCondition cond, LocationSummary* locations, MipsLabel* label) { @@ -5775,6 +5989,9 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall)); + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); if (generate_volatile) { InvokeRuntimeCallingConvention calling_convention; @@ -6453,6 +6670,7 @@ void CodeGeneratorMIPS::GenerateReadBarrierForRootSlow(HInstruction* instruction void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool baker_read_barrier_slow_path = false; switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: @@ -6460,6 +6678,7 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: call_kind = kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -6469,6 +6688,9 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { } LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + if (baker_read_barrier_slow_path) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); // The output does overlap inputs. @@ -6738,7 +6960,7 @@ void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invo DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6(); - bool has_extra_input = invoke->HasPcRelativeDexCache() && !is_r6; + bool has_extra_input = invoke->HasPcRelativeMethodLoadKind() && !is_r6; IntrinsicLocationsBuilderMIPS intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { @@ -6784,27 +7006,22 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( bool is_r6 = GetInstructionSetFeatures().IsR6(); bool fallback_load = has_irreducible_loops && !is_r6; switch (desired_string_load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimeAddress: - DCHECK(!GetCompilerOptions().GetCompilePic()); - break; case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - DCHECK(GetCompilerOptions().GetCompilePic()); - break; - case HLoadString::LoadKind::kBootImageAddress: - break; case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadString::LoadKind::kBootImageAddress: + break; case HLoadString::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); fallback_load = false; break; - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kRuntimeCall: fallback_load = false; break; } if (fallback_load) { - desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; + desired_string_load_kind = HLoadString::LoadKind::kRuntimeCall; } return desired_string_load_kind; } @@ -6823,27 +7040,22 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: fallback_load = false; break; - case HLoadClass::LoadKind::kBootImageLinkTimeAddress: - DCHECK(!GetCompilerOptions().GetCompilePic()); - break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - DCHECK(GetCompilerOptions().GetCompilePic()); - break; - case HLoadClass::LoadKind::kBootImageAddress: - break; case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadClass::LoadKind::kBootImageAddress: + break; case HLoadClass::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); fallback_load = false; break; - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: fallback_load = false; break; } if (fallback_load) { - desired_class_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; + desired_class_load_kind = HLoadClass::LoadKind::kRuntimeCall; } return desired_class_load_kind; } @@ -6885,6 +7097,7 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticO bool is_r6 = GetInstructionSetFeatures().IsR6(); bool fallback_load = has_irreducible_loops && !is_r6; switch (dispatch_info.method_load_kind) { + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: break; default: @@ -6904,7 +7117,7 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation(); bool is_r6 = GetInstructionSetFeatures().IsR6(); - Register base_reg = (invoke->HasPcRelativeDexCache() && !is_r6) + Register base_reg = (invoke->HasPcRelativeMethodLoadKind() && !is_r6) ? GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()) : ZERO; @@ -6922,6 +7135,16 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { + DCHECK(GetCompilerOptions().IsBootImage()); + PcRelativePatchInfo* info = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + bool reordering = __ SetReorder(false); + Register temp_reg = temp.AsRegister<Register>(); + EmitPcRelativeAddressPlaceholderHigh(info, TMP, base_reg); + __ Addiu(temp_reg, TMP, /* placeholder */ 0x5678); + __ SetReorder(reordering); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); break; @@ -7054,28 +7277,28 @@ void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) { void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; - CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( - cls, - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - calling_convention.GetReturnLocation(Primitive::kPrimNot)); + Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc); return; } DCHECK(!cls->NeedsAccessCheck()); - + const bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } switch (load_kind) { // We need an extra register for PC-relative literals on R2. - case HLoadClass::LoadKind::kBootImageLinkTimeAddress: case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: case HLoadClass::LoadKind::kBootImageAddress: case HLoadClass::LoadKind::kBssEntry: - if (codegen_->GetInstructionSetFeatures().IsR6()) { + if (isR6) { break; } FALLTHROUGH_INTENDED; @@ -7086,13 +7309,29 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { break; } locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadClass::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the type resolution or initialization and marking to save everything we need. + // Request a temp to hold the BSS entry location for the slow path on R2 + // (no benefit for R6). + if (!isR6) { + locations->AddTemp(Location::RequiresRegister()); + } + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barriers we have a temp-clobbering call. + } + } } // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not // move. void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); return; } @@ -7105,14 +7344,13 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); switch (load_kind) { // We need an extra register for PC-relative literals on R2. - case HLoadClass::LoadKind::kBootImageLinkTimeAddress: case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: case HLoadClass::LoadKind::kBootImageAddress: case HLoadClass::LoadKind::kBssEntry: base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); break; case HLoadClass::LoadKind::kReferrersClass: - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: base_or_current_method_reg = locations->InAt(0).AsRegister<Register>(); break; default: @@ -7136,14 +7374,6 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF read_barrier_option); break; } - case HLoadClass::LoadKind::kBootImageLinkTimeAddress: - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - __ LoadLiteral(out, - base_or_current_method_reg, - codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(), - cls->GetTypeIndex())); - break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); @@ -7168,10 +7398,22 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF case HLoadClass::LoadKind::kBssEntry: { CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); - bool reordering = __ SetReorder(false); - codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); - GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option); - __ SetReorder(reordering); + constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier; + if (isR6 || non_baker_read_barrier) { + bool reordering = __ SetReorder(false); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); + GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option); + __ SetReorder(reordering); + } else { + // On R2 save the BSS entry address in a temporary register instead of + // recalculating it in the slow path. + Register temp = locations->GetTemp(0).AsRegister<Register>(); + bool reordering = __ SetReorder(false); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, temp, base_or_current_method_reg); + __ Addiu(temp, temp, /* placeholder */ 0x5678); + __ SetReorder(reordering); + GenerateGcRootFieldLoad(cls, out_loc, temp, /* offset */ 0, read_barrier_option); + } generate_null_check = true; break; } @@ -7186,7 +7428,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF __ SetReorder(reordering); break; } - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: case HLoadClass::LoadKind::kInvalid: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -7235,28 +7477,44 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); + const bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); switch (load_kind) { // We need an extra register for PC-relative literals on R2. - case HLoadString::LoadKind::kBootImageLinkTimeAddress: case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: case HLoadString::LoadKind::kBssEntry: - if (codegen_->GetInstructionSetFeatures().IsR6()) { + if (isR6) { break; } FALLTHROUGH_INTENDED; // We need an extra register for PC-relative dex cache accesses. - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kRuntimeCall: locations->SetInAt(0, Location::RequiresRegister()); break; default: break; } - if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadString::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; - locations->SetOut(calling_convention.GetReturnLocation(load->GetType())); + locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } else { locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadString::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the pResolveString and marking to save everything we need. + // Request a temp to hold the BSS entry location for the slow path on R2 + // (no benefit for R6). + if (!isR6) { + locations->AddTemp(Location::RequiresRegister()); + } + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barriers we have a temp-clobbering call. + } + } } } @@ -7271,7 +7529,6 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); switch (load_kind) { // We need an extra register for PC-relative literals on R2. - case HLoadString::LoadKind::kBootImageLinkTimeAddress: case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: case HLoadString::LoadKind::kBssEntry: @@ -7283,13 +7540,6 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ } switch (load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimeAddress: - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); - __ LoadLiteral(out, - base_or_current_method_reg, - codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(), - load->GetStringIndex())); - return; // No dex cache slow path. case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info = @@ -7313,14 +7563,26 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); - bool reordering = __ SetReorder(false); - codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); - GenerateGcRootFieldLoad(load, - out_loc, - out, - /* placeholder */ 0x5678, - kCompilerReadBarrierOption); - __ SetReorder(reordering); + constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier; + if (isR6 || non_baker_read_barrier) { + bool reordering = __ SetReorder(false); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); + GenerateGcRootFieldLoad(load, + out_loc, + out, + /* placeholder */ 0x5678, + kCompilerReadBarrierOption); + __ SetReorder(reordering); + } else { + // On R2 save the BSS entry address in a temporary register instead of + // recalculating it in the slow path. + Register temp = locations->GetTemp(0).AsRegister<Register>(); + bool reordering = __ SetReorder(false); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, temp, base_or_current_method_reg); + __ Addiu(temp, temp, /* placeholder */ 0x5678); + __ SetReorder(reordering); + GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kCompilerReadBarrierOption); + } SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); codegen_->AddSlowPath(slow_path); __ Beqz(out, slow_path->GetEntryLabel()); @@ -7348,8 +7610,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ } // TODO: Re-add the compiler code to do string dex cache lookup again. - DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod); + DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall); InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(calling_convention.GetRegisterAt(0), out); __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); @@ -7774,6 +8037,15 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { } } +void LocationsBuilderMIPS::VisitConstructorFence(HConstructorFence* constructor_fence) { + constructor_fence->SetLocations(nullptr); +} + +void InstructionCodeGeneratorMIPS::VisitConstructorFence( + HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + void LocationsBuilderMIPS::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } @@ -8093,6 +8365,23 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi } } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) { CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); + + // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum + // value of the output type if the input is outside of the range after the truncation or + // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct + // results. This matches the desired float/double-to-int/long conversion exactly. + // + // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive + // value when the input is either a NaN or is outside of the range of the output type + // after the truncation. IOW, the three special cases (NaN, too small, too big) produce + // the same result. + // + // The code takes care of the different behaviors by first comparing the input to the + // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int). + // If the input is greater than or equal to the minimum, it procedes to the truncate + // instruction, which will handle such an input the same way irrespective of NAN2008. + // Otherwise the input is compared to itself to determine whether it is a NaN or not + // in order to return either zero or the minimum value. if (result_type == Primitive::kPrimLong) { if (isR6) { // trunc.l.s/trunc.l.d requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary @@ -8100,62 +8389,6 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi FRegister src = locations->InAt(0).AsFpuRegister<FRegister>(); Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); Register dst_low = locations->Out().AsRegisterPairLow<Register>(); - MipsLabel truncate; - MipsLabel done; - - // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive - // value when the input is either a NaN or is outside of the range of the output type - // after the truncation. IOW, the three special cases (NaN, too small, too big) produce - // the same result. - // - // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum - // value of the output type if the input is outside of the range after the truncation or - // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct - // results. This matches the desired float/double-to-int/long conversion exactly. - // - // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction. - // - // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate - // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6, - // even though it must be NAN2008=1 on R6. - // - // The code takes care of the different behaviors by first comparing the input to the - // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int). - // If the input is greater than or equal to the minimum, it procedes to the truncate - // instruction, which will handle such an input the same way irrespective of NAN2008. - // Otherwise the input is compared to itself to determine whether it is a NaN or not - // in order to return either zero or the minimum value. - // - // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the - // truncate instruction for MIPS64R6. - if (input_type == Primitive::kPrimFloat) { - uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min()); - __ LoadConst32(TMP, min_val); - __ Mtc1(TMP, FTMP); - __ CmpLeS(FTMP, FTMP, src); - } else { - uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min()); - __ LoadConst32(TMP, High32Bits(min_val)); - __ Mtc1(ZERO, FTMP); - __ Mthc1(TMP, FTMP); - __ CmpLeD(FTMP, FTMP, src); - } - - __ Bc1nez(FTMP, &truncate); - - if (input_type == Primitive::kPrimFloat) { - __ CmpEqS(FTMP, src, src); - } else { - __ CmpEqD(FTMP, src, src); - } - __ Move(dst_low, ZERO); - __ LoadConst32(dst_high, std::numeric_limits<int32_t>::min()); - __ Mfc1(TMP, FTMP); - __ And(dst_high, dst_high, TMP); - - __ B(&done); - - __ Bind(&truncate); if (input_type == Primitive::kPrimFloat) { __ TruncLS(FTMP, src); @@ -8164,8 +8397,6 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi } __ Mfc1(dst_low, FTMP); __ Mfhc1(dst_high, FTMP); - - __ Bind(&done); } else { QuickEntrypointEnum entrypoint = (input_type == Primitive::kPrimFloat) ? kQuickF2l : kQuickD2l; @@ -8182,43 +8413,19 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi MipsLabel truncate; MipsLabel done; - // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate - // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6, - // even though it must be NAN2008=1 on R6. - // - // For details see the large comment above for the truncation of float/double to long on R6. - // - // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the - // truncate instruction for MIPS64R6. - if (input_type == Primitive::kPrimFloat) { - uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min()); - __ LoadConst32(TMP, min_val); - __ Mtc1(TMP, FTMP); - } else { - uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min()); - __ LoadConst32(TMP, High32Bits(min_val)); - __ Mtc1(ZERO, FTMP); - __ MoveToFpuHigh(TMP, FTMP); - } - - if (isR6) { + if (!isR6) { if (input_type == Primitive::kPrimFloat) { - __ CmpLeS(FTMP, FTMP, src); + uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min()); + __ LoadConst32(TMP, min_val); + __ Mtc1(TMP, FTMP); } else { - __ CmpLeD(FTMP, FTMP, src); + uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min()); + __ LoadConst32(TMP, High32Bits(min_val)); + __ Mtc1(ZERO, FTMP); + __ MoveToFpuHigh(TMP, FTMP); } - __ Bc1nez(FTMP, &truncate); if (input_type == Primitive::kPrimFloat) { - __ CmpEqS(FTMP, src, src); - } else { - __ CmpEqD(FTMP, src, src); - } - __ LoadConst32(dst, std::numeric_limits<int32_t>::min()); - __ Mfc1(TMP, FTMP); - __ And(dst, dst, TMP); - } else { - if (input_type == Primitive::kPrimFloat) { __ ColeS(0, FTMP, src); } else { __ ColeD(0, FTMP, src); @@ -8232,11 +8439,11 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi } __ LoadConst32(dst, std::numeric_limits<int32_t>::min()); __ Movf(dst, ZERO, 0); - } - __ B(&done); + __ B(&done); - __ Bind(&truncate); + __ Bind(&truncate); + } if (input_type == Primitive::kPrimFloat) { __ TruncWS(FTMP, src); @@ -8245,7 +8452,9 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi } __ Mfc1(dst, FTMP); - __ Bind(&done); + if (!isR6) { + __ Bind(&done); + } } } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsFloatingPointType(input_type)) { diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 3875c4bdba..736b5070d9 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -23,8 +23,8 @@ #include "nodes.h" #include "parallel_move_resolver.h" #include "string_reference.h" +#include "type_reference.h" #include "utils/mips/assembler_mips.h" -#include "utils/type_reference.h" namespace art { namespace mips { @@ -229,9 +229,10 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { // We switch to the table-based method starting with 7 cases. static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; + void GenerateMemoryBarrier(MemBarrierKind kind); + private: void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg); - void GenerateMemoryBarrier(MemBarrierKind kind); void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* operation); void HandleCondition(HCondition* instruction); @@ -294,6 +295,7 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { void GenerateIntCompareAndBranch(IfCondition cond, LocationSummary* locations, MipsLabel* label); + void GenerateLongCompare(IfCondition cond, LocationSummary* locations); void GenerateLongCompareAndBranch(IfCondition cond, LocationSummary* locations, MipsLabel* label); @@ -580,15 +582,13 @@ class CodeGeneratorMIPS : public CodeGenerator { MipsLabel pc_rel_label; }; - PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index); + PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); + PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, + dex::StringIndex string_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); - Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, - dex::StringIndex string_index); - Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index); Literal* DeduplicateBootImageAddressLiteral(uint32_t address); void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info, Register out, Register base); @@ -622,16 +622,8 @@ class CodeGeneratorMIPS : public CodeGenerator { Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>; - using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>; - using BootStringToLiteralMap = ArenaSafeMap<StringReference, - Literal*, - StringReferenceValueComparator>; - using BootTypeToLiteralMap = ArenaSafeMap<TypeReference, - Literal*, - TypeReferenceValueComparator>; Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); - Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches); @@ -653,16 +645,15 @@ class CodeGeneratorMIPS : public CodeGenerator { Uint32ToLiteralMap uint32_literals_; // PC-relative patch info for each HMipsDexCacheArraysBase. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; - // Deduplication map for boot string literals for kBootImageLinkTimeAddress. - BootStringToLiteralMap boot_image_string_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; - // Deduplication map for boot type literals for kBootImageLinkTimeAddress. - BootTypeToLiteralMap boot_image_type_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // Patches for string root accesses in JIT compiled code. ArenaDeque<JitPatchInfo> jit_string_patches_; // Patches for class root accesses in JIT compiled code. diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 0459a033f8..6026814f04 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -141,7 +141,8 @@ class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: - explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction) : SlowPathCodeMIPS64(instruction) {} + explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction) + : SlowPathCodeMIPS64(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); @@ -192,7 +193,9 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); Primitive::Type type = instruction_->GetType(); - mips64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type); + mips64_codegen->MoveLocation(out, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + type); } RestoreLiveRegisters(codegen, locations); @@ -200,10 +203,6 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) { DCHECK(out.IsValid()); - // TODO: Change art_quick_initialize_type/art_quick_initialize_static_storage to - // kSaveEverything and use a temporary for the .bss entry address in the fast path, - // so that we can avoid another calculation here. - DCHECK_NE(out.AsRegister<GpuRegister>(), AT); CodeGeneratorMIPS64::PcRelativePatchInfo* info = mips64_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index); mips64_codegen->EmitPcRelativeAddressPlaceholderHigh(info, AT); @@ -250,16 +249,13 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); Primitive::Type type = instruction_->GetType(); mips64_codegen->MoveLocation(locations->Out(), - calling_convention.GetReturnLocation(type), + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), type); RestoreLiveRegisters(codegen, locations); // Store the resolved String to the BSS entry. - // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the - // .bss entry address in the fast path, so that we can avoid another calculation here. GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - DCHECK_NE(out, AT); CodeGeneratorMIPS64::PcRelativePatchInfo* info = mips64_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); mips64_codegen->EmitPcRelativeAddressPlaceholderHigh(info, AT); @@ -306,10 +302,13 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { : SlowPathCodeMIPS64(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); // Only saves live vector registers for SIMD. mips64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); + RestoreLiveRegisters(codegen, locations); // Only restores live vector registers for SIMD. if (successor_ == nullptr) { __ Bc(GetReturnLabel()); } else { @@ -952,20 +951,17 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), - assembler_(graph->GetArena()), + assembler_(graph->GetArena(), &isa_features), isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), uint64_literals_(std::less<uint64_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - boot_image_string_patches_(StringReferenceValueComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - boot_image_type_patches_(TypeReferenceValueComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), @@ -1445,50 +1441,36 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat DCHECK(linker_patches->empty()); size_t size = pc_relative_dex_cache_patches_.size() + - pc_relative_string_patches_.size() + + pc_relative_method_patches_.size() + pc_relative_type_patches_.size() + type_bss_entry_patches_.size() + - boot_image_string_patches_.size() + - boot_image_type_patches_.size(); + pc_relative_string_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); - if (!GetCompilerOptions().IsBootImage()) { - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); - } else { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, linker_patches); + } else { + DCHECK(pc_relative_method_patches_.empty()); + DCHECK(pc_relative_type_patches_.empty()); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); - for (const auto& entry : boot_image_string_patches_) { - const StringReference& target_string = entry.first; - Literal* literal = entry.second; - DCHECK(literal->GetLabel()->IsBound()); - uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); - linker_patches->push_back(LinkerPatch::StringPatch(literal_offset, - target_string.dex_file, - target_string.string_index.index_)); - } - for (const auto& entry : boot_image_type_patches_) { - const TypeReference& target_type = entry.first; - Literal* literal = entry.second; - DCHECK(literal->GetLabel()->IsBound()); - uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); - linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, - target_type.dex_file, - target_type.type_index.index_)); - } DCHECK_EQ(size, linker_patches->size()); } -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStringPatch( - const DexFile& dex_file, dex::StringIndex string_index) { - return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeMethodPatch( + MethodReference target_method) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + &pc_relative_method_patches_); } CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeTypePatch( @@ -1501,6 +1483,11 @@ CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewTypeBssEntryPa return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_); } +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStringPatch( + const DexFile& dex_file, dex::StringIndex string_index) { + return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +} + CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeDexCacheArrayPatch( const DexFile& dex_file, uint32_t element_offset) { return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); @@ -1524,27 +1511,6 @@ Literal* CodeGeneratorMIPS64::DeduplicateUint64Literal(uint64_t value) { [this, value]() { return __ NewLiteral<uint64_t>(value); }); } -Literal* CodeGeneratorMIPS64::DeduplicateMethodLiteral(MethodReference target_method, - MethodToLiteralMap* map) { - return map->GetOrCreate( - target_method, - [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); -} - -Literal* CodeGeneratorMIPS64::DeduplicateBootImageStringLiteral(const DexFile& dex_file, - dex::StringIndex string_index) { - return boot_image_string_patches_.GetOrCreate( - StringReference(&dex_file, string_index), - [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); -} - -Literal* CodeGeneratorMIPS64::DeduplicateBootImageTypeLiteral(const DexFile& dex_file, - dex::TypeIndex type_index) { - return boot_image_type_patches_.GetOrCreate( - TypeReference(&dex_file, type_index), - [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); -} - Literal* CodeGeneratorMIPS64::DeduplicateBootImageAddressLiteral(uint64_t address) { return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_); } @@ -1590,14 +1556,20 @@ void CodeGeneratorMIPS64::PatchJitRootUse(uint8_t* code, void CodeGeneratorMIPS64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { for (const auto& entry : jit_string_patches_) { - const auto& it = jit_string_roots_.find(entry.first); + const StringReference& string_reference = entry.first; + Literal* table_entry_literal = entry.second; + const auto it = jit_string_roots_.find(string_reference); DCHECK(it != jit_string_roots_.end()); - PatchJitRootUse(code, roots_data, entry.second, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } for (const auto& entry : jit_class_patches_) { - const auto& it = jit_class_roots_.find(entry.first); + const TypeReference& type_reference = entry.first; + Literal* table_entry_literal = entry.second; + const auto it = jit_class_roots_.find(type_reference); DCHECK(it != jit_class_roots_.end()); - PatchJitRootUse(code, roots_data, entry.second, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } } @@ -1645,13 +1617,19 @@ size_t CodeGeneratorMIPS64::RestoreCoreRegister(size_t stack_index, uint32_t reg } size_t CodeGeneratorMIPS64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ StoreFpuToOffset(kStoreDoubleword, FpuRegister(reg_id), SP, stack_index); - return kMips64DoublewordSize; + __ StoreFpuToOffset(GetGraph()->HasSIMD() ? kStoreQuadword : kStoreDoubleword, + FpuRegister(reg_id), + SP, + stack_index); + return GetFloatingPointSpillSlotSize(); } size_t CodeGeneratorMIPS64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ LoadFpuFromOffset(kLoadDoubleword, FpuRegister(reg_id), SP, stack_index); - return kMips64DoublewordSize; + __ LoadFpuFromOffset(GetGraph()->HasSIMD() ? kLoadQuadword : kLoadDoubleword, + FpuRegister(reg_id), + SP, + stack_index); + return GetFloatingPointSpillSlotSize(); } void CodeGeneratorMIPS64::DumpCoreRegister(std::ostream& stream, int reg) const { @@ -1991,6 +1969,9 @@ void LocationsBuilderMIPS64::VisitArrayGet(HArrayGet* instruction) { object_array_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(type)) { @@ -3990,6 +3971,9 @@ void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction, object_field_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); @@ -4552,6 +4536,7 @@ void CodeGeneratorMIPS64::GenerateReadBarrierForRootSlow(HInstruction* instructi void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool baker_read_barrier_slow_path = false; switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: @@ -4559,6 +4544,7 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: call_kind = kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -4568,6 +4554,9 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { } LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + if (baker_read_barrier_slow_path) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); // The output does overlap inputs. @@ -4876,25 +4865,19 @@ HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { bool fallback_load = false; switch (desired_string_load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimeAddress: - DCHECK(!GetCompilerOptions().GetCompilePic()); - break; case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - DCHECK(GetCompilerOptions().GetCompilePic()); - break; - case HLoadString::LoadKind::kBootImageAddress: - break; case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kDexCacheViaMethod: - break; case HLoadString::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; + case HLoadString::LoadKind::kBootImageAddress: + case HLoadString::LoadKind::kRuntimeCall: + break; } if (fallback_load) { - desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; + desired_string_load_kind = HLoadString::LoadKind::kRuntimeCall; } return desired_string_load_kind; } @@ -4908,25 +4891,19 @@ HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind( UNREACHABLE(); case HLoadClass::LoadKind::kReferrersClass: break; - case HLoadClass::LoadKind::kBootImageLinkTimeAddress: - DCHECK(!GetCompilerOptions().GetCompilePic()); - break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - DCHECK(GetCompilerOptions().GetCompilePic()); - break; - case HLoadClass::LoadKind::kBootImageAddress: - break; case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; case HLoadClass::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kBootImageAddress: + case HLoadClass::LoadKind::kRuntimeCall: break; } if (fallback_load) { - desired_class_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; + desired_class_load_kind = HLoadClass::LoadKind::kRuntimeCall; } return desired_class_load_kind; } @@ -4958,6 +4935,14 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { + DCHECK(GetCompilerOptions().IsBootImage()); + CodeGeneratorMIPS64::PcRelativePatchInfo* info = + NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + EmitPcRelativeAddressPlaceholderHigh(info, AT); + __ Daddiu(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadLiteral(temp.AsRegister<GpuRegister>(), kLoadDoubleword, @@ -5083,12 +5068,10 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; - CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( - cls, - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - calling_convention.GetReturnLocation(Primitive::kPrimNot)); + Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc); return; } DCHECK(!cls->NeedsAccessCheck()); @@ -5098,17 +5081,31 @@ void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } if (load_kind == HLoadClass::LoadKind::kReferrersClass) { locations->SetInAt(0, Location::RequiresRegister()); } locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadClass::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the type resolution or initialization and marking to save everything we need. + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barrier we have a temp-clobbering call. + } + } } // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not // move. void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); return; } @@ -5119,7 +5116,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S GpuRegister out = out_loc.AsRegister<GpuRegister>(); GpuRegister current_method_reg = ZERO; if (load_kind == HLoadClass::LoadKind::kReferrersClass || - load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + load_kind == HLoadClass::LoadKind::kRuntimeCall) { current_method_reg = locations->InAt(0).AsRegister<GpuRegister>(); } @@ -5138,14 +5135,6 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S ArtMethod::DeclaringClassOffset().Int32Value(), read_barrier_option); break; - case HLoadClass::LoadKind::kBootImageLinkTimeAddress: - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - __ LoadLiteral(out, - kLoadUnsignedWord, - codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(), - cls->GetTypeIndex())); - break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); @@ -5181,7 +5170,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S cls->GetClass())); GenerateGcRootFieldLoad(cls, out_loc, out, 0, read_barrier_option); break; - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: case HLoadClass::LoadKind::kInvalid: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -5230,11 +5219,22 @@ void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) { HLoadString::LoadKind load_kind = load->GetLoadKind(); LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); - if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadString::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; - locations->SetOut(calling_convention.GetReturnLocation(load->GetType())); + locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } else { locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadString::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the pResolveString and marking to save everything we need. + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barrier we have a temp-clobbering call. + } + } } } @@ -5247,13 +5247,6 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA GpuRegister out = out_loc.AsRegister<GpuRegister>(); switch (load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimeAddress: - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); - __ LoadLiteral(out, - kLoadUnsignedWord, - codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(), - load->GetStringIndex())); - return; // No dex cache slow path. case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS64::PcRelativePatchInfo* info = @@ -5300,8 +5293,9 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA } // TODO: Re-add the compiler code to do string dex cache lookup again. - DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod); + DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall); InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(calling_convention.GetRegisterAt(0), out); __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); @@ -5661,6 +5655,15 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) { } } +void LocationsBuilderMIPS64::VisitConstructorFence(HConstructorFence* constructor_fence) { + constructor_fence->SetLocations(nullptr); +} + +void InstructionCodeGeneratorMIPS64::VisitConstructorFence( + HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + void LocationsBuilderMIPS64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } @@ -5806,7 +5809,11 @@ void InstructionCodeGeneratorMIPS64::VisitUnresolvedStaticFieldSet( void LocationsBuilderMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + // In suspend check slow path, usually there are no caller-save registers at all. + // If SIMD instructions are present, however, we force spilling all live SIMD + // registers in full width (since the runtime only saves/restores lower part). + locations->SetCustomSlowPathCallerSaves( + GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty()); } void InstructionCodeGeneratorMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) { @@ -5933,68 +5940,6 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>(); - Mips64Label truncate; - Mips64Label done; - - // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive - // value when the input is either a NaN or is outside of the range of the output type - // after the truncation. IOW, the three special cases (NaN, too small, too big) produce - // the same result. - // - // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum - // value of the output type if the input is outside of the range after the truncation or - // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct - // results. This matches the desired float/double-to-int/long conversion exactly. - // - // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction. - // - // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate - // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6, - // even though it must be NAN2008=1 on R6. - // - // The code takes care of the different behaviors by first comparing the input to the - // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int). - // If the input is greater than or equal to the minimum, it procedes to the truncate - // instruction, which will handle such an input the same way irrespective of NAN2008. - // Otherwise the input is compared to itself to determine whether it is a NaN or not - // in order to return either zero or the minimum value. - // - // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the - // truncate instruction for MIPS64R6. - if (input_type == Primitive::kPrimFloat) { - uint32_t min_val = (result_type == Primitive::kPrimLong) - ? bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min()) - : bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min()); - __ LoadConst32(TMP, min_val); - __ Mtc1(TMP, FTMP); - __ CmpLeS(FTMP, FTMP, src); - } else { - uint64_t min_val = (result_type == Primitive::kPrimLong) - ? bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min()) - : bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min()); - __ LoadConst64(TMP, min_val); - __ Dmtc1(TMP, FTMP); - __ CmpLeD(FTMP, FTMP, src); - } - - __ Bc1nez(FTMP, &truncate); - - if (input_type == Primitive::kPrimFloat) { - __ CmpEqS(FTMP, src, src); - } else { - __ CmpEqD(FTMP, src, src); - } - if (result_type == Primitive::kPrimLong) { - __ LoadConst64(dst, std::numeric_limits<int64_t>::min()); - } else { - __ LoadConst32(dst, std::numeric_limits<int32_t>::min()); - } - __ Mfc1(TMP, FTMP); - __ And(dst, dst, TMP); - - __ Bc(&done); - - __ Bind(&truncate); if (result_type == Primitive::kPrimLong) { if (input_type == Primitive::kPrimFloat) { @@ -6011,8 +5956,6 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver } __ Mfc1(dst, FTMP); } - - __ Bind(&done); } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsFloatingPointType(input_type)) { FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index fd1a174608..9c6b6f62cb 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -21,8 +21,8 @@ #include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" +#include "type_reference.h" #include "utils/mips64/assembler_mips64.h" -#include "utils/type_reference.h" namespace art { namespace mips64 { @@ -226,9 +226,10 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { // We switch to the table-based method starting with 7 cases. static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; + void GenerateMemoryBarrier(MemBarrierKind kind); + private: void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg); - void GenerateMemoryBarrier(MemBarrierKind kind); void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* operation); void HandleCondition(HCondition* instruction); @@ -313,6 +314,9 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { uint32_t num_entries, HBasicBlock* switch_block, HBasicBlock* default_block); + int32_t VecAddress(LocationSummary* locations, + size_t size, + /* out */ GpuRegister* adjusted_base); Mips64Assembler* const assembler_; CodeGeneratorMIPS64* const codegen_; @@ -335,7 +339,11 @@ class CodeGeneratorMIPS64 : public CodeGenerator { size_t GetWordSize() const OVERRIDE { return kMips64DoublewordSize; } - size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64DoublewordSize; } + size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + return GetGraph()->HasSIMD() + ? 2 * kMips64DoublewordSize // 16 bytes for each spill. + : 1 * kMips64DoublewordSize; // 8 bytes for each spill. + } uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE { return assembler_.GetLabelLocation(GetLabelOf(block)); @@ -540,17 +548,15 @@ class CodeGeneratorMIPS64 : public CodeGenerator { Mips64Label pc_rel_label; }; - PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index); + PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); + PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, + dex::StringIndex string_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); PcRelativePatchInfo* NewPcRelativeCallPatch(const DexFile& dex_file, uint32_t method_index); - Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, - dex::StringIndex string_index); - Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index); Literal* DeduplicateBootImageAddressLiteral(uint64_t address); void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info, GpuRegister out); @@ -569,23 +575,15 @@ class CodeGeneratorMIPS64 : public CodeGenerator { private: using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>; using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, Literal*>; - using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>; using StringToLiteralMap = ArenaSafeMap<StringReference, Literal*, StringReferenceValueComparator>; using TypeToLiteralMap = ArenaSafeMap<TypeReference, Literal*, TypeReferenceValueComparator>; - using BootStringToLiteralMap = ArenaSafeMap<StringReference, - Literal*, - StringReferenceValueComparator>; - using BootTypeToLiteralMap = ArenaSafeMap<TypeReference, - Literal*, - TypeReferenceValueComparator>; Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); Literal* DeduplicateUint64Literal(uint64_t value); - Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, uint32_t offset_or_index, @@ -611,16 +609,15 @@ class CodeGeneratorMIPS64 : public CodeGenerator { Uint64ToLiteralMap uint64_literals_; // PC-relative patch info. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; - // Deduplication map for boot string literals for kBootImageLinkTimeAddress. - BootStringToLiteralMap boot_image_string_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; - // Deduplication map for boot type literals for kBootImageLinkTimeAddress. - BootTypeToLiteralMap boot_image_type_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // Patches for string root accesses in JIT compiled code. StringToLiteralMap jit_string_patches_; // Patches for class root accesses in JIT compiled code. diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index 93befa439c..a41adca02c 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -22,6 +22,7 @@ using namespace vixl::aarch64; // NOLINT(build/namespaces) namespace art { namespace arm64 { +using helpers::DRegisterFrom; using helpers::VRegisterFrom; using helpers::HeapOperand; using helpers::InputRegisterAt; @@ -467,7 +468,50 @@ void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) { } void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) { - LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister rhs = VRegisterFrom(locations->InAt(1)); + VRegister dst = VRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B()); + } else { + __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B()); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H()); + } else { + __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H()); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S()); + } else { + __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S()); + } + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S()); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) { @@ -475,7 +519,50 @@ void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) { } void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) { - LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister rhs = VRegisterFrom(locations->InAt(1)); + VRegister dst = VRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B()); + } else { + __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B()); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H()); + } else { + __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H()); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S()); + } else { + __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S()); + } + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S()); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) { @@ -771,20 +858,28 @@ static void CreateVecMemLocations(ArenaAllocator* arena, } } -// Helper to set up registers and address for vector memory operations. -MemOperand InstructionCodeGeneratorARM64::CreateVecMemRegisters( +// Helper to set up locations for vector memory operations. Returns the memory operand and, +// if used, sets the output parameter scratch to a temporary register used in this operand, +// so that the client can release it right after the memory operand use. +MemOperand InstructionCodeGeneratorARM64::VecAddress( HVecMemoryOperation* instruction, - Location* reg_loc, - bool is_load, - UseScratchRegisterScope* temps_scope) { + UseScratchRegisterScope* temps_scope, + size_t size, + bool is_string_char_at, + /*out*/ Register* scratch) { LocationSummary* locations = instruction->GetLocations(); Register base = InputRegisterAt(instruction, 0); - Location index = locations->InAt(1); - *reg_loc = is_load ? locations->Out() : locations->InAt(2); - Primitive::Type packed_type = instruction->GetPackedType(); - uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(packed_type)).Uint32Value(); - size_t shift = Primitive::ComponentSizeShift(packed_type); + if (instruction->InputAt(1)->IsIntermediateAddressIndex()) { + DCHECK(!is_string_char_at); + return MemOperand(base.X(), InputRegisterAt(instruction, 1).X()); + } + + Location index = locations->InAt(1); + uint32_t offset = is_string_char_at + ? mirror::String::ValueOffset().Uint32Value() + : mirror::Array::DataOffset(size).Uint32Value(); + size_t shift = ComponentSizeShiftWidth(size); // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet. DCHECK(!instruction->InputAt(0)->IsIntermediateAddress()); @@ -793,10 +888,9 @@ MemOperand InstructionCodeGeneratorARM64::CreateVecMemRegisters( offset += Int64ConstantFrom(index) << shift; return HeapOperand(base, offset); } else { - Register temp = temps_scope->AcquireSameSizeAs(base); - __ Add(temp, base, Operand(WRegisterFrom(index), LSL, shift)); - - return HeapOperand(temp, offset); + *scratch = temps_scope->AcquireSameSizeAs(base); + __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift)); + return HeapOperand(*scratch, offset); } } @@ -805,15 +899,43 @@ void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) { } void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) { - Location reg_loc = Location::NoLocation(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + VRegister reg = VRegisterFrom(locations->Out()); UseScratchRegisterScope temps(GetVIXLAssembler()); - MemOperand mem = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ true, &temps); - VRegister reg = VRegisterFrom(reg_loc); + Register scratch; switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + DCHECK_EQ(8u, instruction->GetVectorLength()); + // Special handling of compressed/uncompressed string load. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + vixl::aarch64::Label uncompressed_load, done; + // Test compression bit. + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + Register length = temps.AcquireW(); + __ Ldr(length, HeapOperand(InputRegisterAt(instruction, 0), count_offset)); + __ Tbnz(length.W(), 0, &uncompressed_load); + temps.Release(length); // no longer needed + // Zero extend 8 compressed bytes into 8 chars. + __ Ldr(DRegisterFrom(locations->Out()).V8B(), + VecAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch)); + __ Uxtl(reg.V8H(), reg.V8B()); + __ B(&done); + if (scratch.IsValid()) { + temps.Release(scratch); // if used, no longer needed + } + // Load 8 direct uncompressed chars. + __ Bind(&uncompressed_load); + __ Ldr(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch)); + __ Bind(&done); + return; + } + FALLTHROUGH_INTENDED; case Primitive::kPrimBoolean: case Primitive::kPrimByte: - case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimFloat: @@ -821,7 +943,7 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) { case Primitive::kPrimDouble: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); - __ Ldr(reg, mem); + __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch)); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -834,10 +956,11 @@ void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) { } void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) { - Location reg_loc = Location::NoLocation(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + VRegister reg = VRegisterFrom(locations->InAt(2)); UseScratchRegisterScope temps(GetVIXLAssembler()); - MemOperand mem = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ false, &temps); - VRegister reg = VRegisterFrom(reg_loc); + Register scratch; switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: @@ -850,7 +973,7 @@ void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) { case Primitive::kPrimDouble: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); - __ Str(reg, mem); + __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch)); break; default: LOG(FATAL) << "Unsupported SIMD type"; diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc index 50b95c17cb..af9e89e791 100644 --- a/compiler/optimizing/code_generator_vector_mips64.cc +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -15,6 +15,7 @@ */ #include "code_generator_mips64.h" +#include "mirror/array-inl.h" namespace art { namespace mips64 { @@ -22,12 +23,72 @@ namespace mips64 { // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<Mips64Assembler*>(GetAssembler())-> // NOLINT +VectorRegister VectorRegisterFrom(Location location) { + DCHECK(location.IsFpuRegister()); + return static_cast<VectorRegister>(location.AsFpuRegister<FpuRegister>()); +} + void LocationsBuilderMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ FillB(dst, locations->InAt(0).AsRegister<GpuRegister>()); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ FillH(dst, locations->InAt(0).AsRegister<GpuRegister>()); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FillW(dst, locations->InAt(0).AsRegister<GpuRegister>()); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FillD(dst, locations->InAt(0).AsRegister<GpuRegister>()); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ ReplicateFPToVectorRegister(dst, + locations->InAt(0).AsFpuRegister<FpuRegister>(), + /* is_double */ false); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ ReplicateFPToVectorRegister(dst, + locations->InAt(0).AsFpuRegister<FpuRegister>(), + /* is_double */ true); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { @@ -51,13 +112,23 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in LocationSummary* locations = new (arena) LocationSummary(instruction); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), + instruction->IsVecNot() ? Location::kOutputOverlap + : Location::kNoOutputOverlap); + break; case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: + case Primitive::kPrimLong: case Primitive::kPrimFloat: case Primitive::kPrimDouble: - DCHECK(locations); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), + (instruction->IsVecNeg() || instruction->IsVecAbs()) + ? Location::kOutputOverlap + : Location::kNoOutputOverlap); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -70,7 +141,18 @@ void LocationsBuilderMIPS64::VisitVecCnv(HVecCnv* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecCnv(HVecCnv* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister src = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + Primitive::Type from = instruction->GetInputType(); + Primitive::Type to = instruction->GetResultType(); + if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) { + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Ffint_sW(dst, src); + } else { + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecNeg(HVecNeg* instruction) { @@ -78,7 +160,45 @@ void LocationsBuilderMIPS64::VisitVecNeg(HVecNeg* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister src = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ FillB(dst, ZERO); + __ SubvB(dst, dst, src); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ FillH(dst, ZERO); + __ SubvH(dst, dst, src); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FillW(dst, ZERO); + __ SubvW(dst, dst, src); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FillD(dst, ZERO); + __ SubvD(dst, dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FillW(dst, ZERO); + __ FsubW(dst, dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FillD(dst, ZERO); + __ FsubD(dst, dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecAbs(HVecAbs* instruction) { @@ -86,7 +206,47 @@ void LocationsBuilderMIPS64::VisitVecAbs(HVecAbs* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecAbs(HVecAbs* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister src = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ FillB(dst, ZERO); // all zeroes + __ Add_aB(dst, dst, src); // dst = abs(0) + abs(src) + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ FillH(dst, ZERO); // all zeroes + __ Add_aH(dst, dst, src); // dst = abs(0) + abs(src) + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FillW(dst, ZERO); // all zeroes + __ Add_aW(dst, dst, src); // dst = abs(0) + abs(src) + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FillD(dst, ZERO); // all zeroes + __ Add_aD(dst, dst, src); // dst = abs(0) + abs(src) + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ LdiW(dst, -1); // all ones + __ SrliW(dst, dst, 1); + __ AndV(dst, dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ LdiD(dst, -1); // all ones + __ SrliD(dst, dst, 1); + __ AndV(dst, dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecNot(HVecNot* instruction) { @@ -94,7 +254,30 @@ void LocationsBuilderMIPS64::VisitVecNot(HVecNot* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecNot(HVecNot* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister src = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: // special case boolean-not + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ LdiB(dst, 1); + __ XorV(dst, dst, src); + break; + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ NorV(dst, src, src); // lanes do not matter + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector binary operations. @@ -106,9 +289,12 @@ static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: + case Primitive::kPrimLong: case Primitive::kPrimFloat: case Primitive::kPrimDouble: - DCHECK(locations); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -121,7 +307,40 @@ void LocationsBuilderMIPS64::VisitVecAdd(HVecAdd* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ AddvB(dst, lhs, rhs); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ AddvH(dst, lhs, rhs); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ AddvW(dst, lhs, rhs); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ AddvD(dst, lhs, rhs); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FaddW(dst, lhs, rhs); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FaddD(dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { @@ -129,7 +348,40 @@ void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + instruction->IsRounded() + ? __ Aver_uB(dst, lhs, rhs) + : __ Ave_uB(dst, lhs, rhs); + } else { + instruction->IsRounded() + ? __ Aver_sB(dst, lhs, rhs) + : __ Ave_sB(dst, lhs, rhs); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + instruction->IsRounded() + ? __ Aver_uH(dst, lhs, rhs) + : __ Ave_uH(dst, lhs, rhs); + } else { + instruction->IsRounded() + ? __ Aver_sH(dst, lhs, rhs) + : __ Ave_sH(dst, lhs, rhs); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) { @@ -137,7 +389,40 @@ void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecSub(HVecSub* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ SubvB(dst, lhs, rhs); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ SubvH(dst, lhs, rhs); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ SubvW(dst, lhs, rhs); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ SubvD(dst, lhs, rhs); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FsubW(dst, lhs, rhs); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FsubD(dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) { @@ -145,7 +430,40 @@ void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecMul(HVecMul* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ MulvB(dst, lhs, rhs); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ MulvH(dst, lhs, rhs); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ MulvW(dst, lhs, rhs); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ MulvD(dst, lhs, rhs); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FmulW(dst, lhs, rhs); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FmulD(dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecDiv(HVecDiv* instruction) { @@ -153,7 +471,23 @@ void LocationsBuilderMIPS64::VisitVecDiv(HVecDiv* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FdivW(dst, lhs, rhs); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FdivD(dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecMin(HVecMin* instruction) { @@ -177,7 +511,27 @@ void LocationsBuilderMIPS64::VisitVecAnd(HVecAnd* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecAnd(HVecAnd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ AndV(dst, lhs, rhs); // lanes do not matter + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecAndNot(HVecAndNot* instruction) { @@ -193,7 +547,27 @@ void LocationsBuilderMIPS64::VisitVecOr(HVecOr* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecOr(HVecOr* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ OrV(dst, lhs, rhs); // lanes do not matter + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecXor(HVecXor* instruction) { @@ -201,7 +575,27 @@ void LocationsBuilderMIPS64::VisitVecXor(HVecXor* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecXor(HVecXor* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ XorV(dst, lhs, rhs); // lanes do not matter + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector shift operations. @@ -213,7 +607,9 @@ static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: - DCHECK(locations); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -226,7 +622,32 @@ void LocationsBuilderMIPS64::VisitVecShl(HVecShl* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecShl(HVecShl* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ SlliB(dst, lhs, value); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ SlliH(dst, lhs, value); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ SlliW(dst, lhs, value); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ SlliD(dst, lhs, value); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecShr(HVecShr* instruction) { @@ -234,7 +655,32 @@ void LocationsBuilderMIPS64::VisitVecShr(HVecShr* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecShr(HVecShr* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ SraiB(dst, lhs, value); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ SraiH(dst, lhs, value); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ SraiW(dst, lhs, value); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ SraiD(dst, lhs, value); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecUShr(HVecUShr* instruction) { @@ -242,7 +688,32 @@ void LocationsBuilderMIPS64::VisitVecUShr(HVecUShr* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ SrliB(dst, lhs, value); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ SrliH(dst, lhs, value); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ SrliW(dst, lhs, value); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ SrliD(dst, lhs, value); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { @@ -253,20 +724,143 @@ void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccu LOG(FATAL) << "No SIMD for " << instr->GetId(); } +// Helper to set up locations for vector memory operations. +static void CreateVecMemLocations(ArenaAllocator* arena, + HVecMemoryOperation* instruction, + bool is_load) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (is_load) { + locations->SetOut(Location::RequiresFpuRegister()); + } else { + locations->SetInAt(2, Location::RequiresFpuRegister()); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to prepare register and offset for vector memory operations. Returns the offset and sets +// the output parameter adjusted_base to the original base or to a reserved temporary register (AT). +int32_t InstructionCodeGeneratorMIPS64::VecAddress(LocationSummary* locations, + size_t size, + /* out */ GpuRegister* adjusted_base) { + GpuRegister base = locations->InAt(0).AsRegister<GpuRegister>(); + Location index = locations->InAt(1); + int scale = TIMES_1; + switch (size) { + case 2: scale = TIMES_2; break; + case 4: scale = TIMES_4; break; + case 8: scale = TIMES_8; break; + default: break; + } + int32_t offset = mirror::Array::DataOffset(size).Int32Value(); + + if (index.IsConstant()) { + offset += index.GetConstant()->AsIntConstant()->GetValue() << scale; + __ AdjustBaseOffsetAndElementSizeShift(base, offset, scale); + *adjusted_base = base; + } else { + GpuRegister index_reg = index.AsRegister<GpuRegister>(); + if (scale != TIMES_1) { + __ Dlsa(AT, index_reg, base, scale); + } else { + __ Daddu(AT, base, index_reg); + } + *adjusted_base = AT; + } + return offset; +} + void LocationsBuilderMIPS64::VisitVecLoad(HVecLoad* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + CreateVecMemLocations(GetGraph()->GetArena(), instruction, /* is_load */ true); } void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + VectorRegister reg = VectorRegisterFrom(locations->Out()); + GpuRegister base; + int32_t offset = VecAddress(locations, size, &base); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ LdB(reg, base, offset); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + // Loading 8-bytes (needed if dealing with compressed strings in StringCharAt) from unaligned + // memory address may cause a trap to the kernel if the CPU doesn't directly support unaligned + // loads and stores. + // TODO: Implement support for StringCharAt. + DCHECK(!instruction->IsStringCharAt()); + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ LdH(reg, base, offset); + break; + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ LdW(reg, base, offset); + break; + case Primitive::kPrimLong: + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ LdD(reg, base, offset); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecStore(HVecStore* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + CreateVecMemLocations(GetGraph()->GetArena(), instruction, /* is_load */ false); } void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + VectorRegister reg = VectorRegisterFrom(locations->InAt(2)); + GpuRegister base; + int32_t offset = VecAddress(locations, size, &base); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ StB(reg, base, offset); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ StH(reg, base, offset); + break; + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ StW(reg, base, offset); + break; + case Primitive::kPrimLong: + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ StD(reg, base, offset); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } #undef __ diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 013b092b5a..14782d70a1 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -201,6 +201,7 @@ void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) { void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) { CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + // Integral-abs requires a temporary for the comparison. if (instruction->GetPackedType() == Primitive::kPrimInt) { instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); } @@ -482,7 +483,51 @@ void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) { } void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pminub(dst, src); + } else { + __ pminsb(dst, src); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pminuw(dst, src); + } else { + __ pminsw(dst, src); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pminud(dst, src); + } else { + __ pminsd(dst, src); + } + break; + // Next cases are sloppy wrt 0.0 vs -0.0. + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ minps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ minpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) { @@ -490,7 +535,51 @@ void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) { } void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pmaxub(dst, src); + } else { + __ pmaxsb(dst, src); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pmaxuw(dst, src); + } else { + __ pmaxsw(dst, src); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pmaxud(dst, src); + } else { + __ pmaxsd(dst, src); + } + break; + // Next cases are sloppy wrt 0.0 vs -0.0. + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ maxps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ maxpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) { @@ -766,16 +855,10 @@ static void CreateVecMemLocations(ArenaAllocator* arena, } } -// Helper to set up registers and address for vector memory operations. -static Address CreateVecMemRegisters(HVecMemoryOperation* instruction, - Location* reg_loc, - bool is_load) { - LocationSummary* locations = instruction->GetLocations(); +// Helper to construct address for vector memory operations. +static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) { Location base = locations->InAt(0); Location index = locations->InAt(1); - *reg_loc = is_load ? locations->Out() : locations->InAt(2); - size_t size = Primitive::ComponentSize(instruction->GetPackedType()); - uint32_t offset = mirror::Array::DataOffset(size).Uint32Value(); ScaleFactor scale = TIMES_1; switch (size) { case 2: scale = TIMES_2; break; @@ -783,22 +866,53 @@ static Address CreateVecMemRegisters(HVecMemoryOperation* instruction, case 8: scale = TIMES_8; break; default: break; } + uint32_t offset = is_string_char_at + ? mirror::String::ValueOffset().Uint32Value() + : mirror::Array::DataOffset(size).Uint32Value(); return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset); } void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) { CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true); + // String load requires a temporary for the compressed load. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + } } void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) { - Location reg_loc = Location::NoLocation(); - Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ true); - XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + Address address = VecAddress(locations, size, instruction->IsStringCharAt()); + XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + DCHECK_EQ(8u, instruction->GetVectorLength()); + // Special handling of compressed/uncompressed string load. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + NearLabel done, not_compressed; + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + // Test compression bit. + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + __ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1)); + __ j(kNotZero, ¬_compressed); + // Zero extend 8 compressed bytes into 8 chars. + __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true)); + __ pxor(tmp, tmp); + __ punpcklbw(reg, tmp); + __ jmp(&done); + // Load 4 direct uncompressed chars. + __ Bind(¬_compressed); + is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address); + __ Bind(&done); + return; + } + FALLTHROUGH_INTENDED; case Primitive::kPrimBoolean: case Primitive::kPrimByte: - case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: @@ -825,9 +939,10 @@ void LocationsBuilderX86::VisitVecStore(HVecStore* instruction) { } void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) { - Location reg_loc = Location::NoLocation(); - Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ false); - XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + Address address = VecAddress(locations, size, /*is_string_char_at*/ false); + XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>(); bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index 66f19a4376..246044ebb8 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -194,6 +194,7 @@ void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) { void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) { CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + // Integral-abs requires a temporary for the comparison. if (instruction->GetPackedType() == Primitive::kPrimInt) { instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); } @@ -352,6 +353,10 @@ void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruct DCHECK(locations->InAt(0).Equals(locations->Out())); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + + DCHECK(instruction->IsRounded()); + DCHECK(instruction->IsUnsigned()); + switch (instruction->GetPackedType()) { case Primitive::kPrimByte: DCHECK_EQ(16u, instruction->GetVectorLength()); @@ -471,7 +476,51 @@ void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) { } void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pminub(dst, src); + } else { + __ pminsb(dst, src); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pminuw(dst, src); + } else { + __ pminsw(dst, src); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pminud(dst, src); + } else { + __ pminsd(dst, src); + } + break; + // Next cases are sloppy wrt 0.0 vs -0.0. + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ minps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ minpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) { @@ -479,7 +528,51 @@ void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) { } void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pmaxub(dst, src); + } else { + __ pmaxsb(dst, src); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pmaxuw(dst, src); + } else { + __ pmaxsw(dst, src); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pmaxud(dst, src); + } else { + __ pmaxsd(dst, src); + } + break; + // Next cases are sloppy wrt 0.0 vs -0.0. + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ maxps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ maxpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) { @@ -755,16 +848,10 @@ static void CreateVecMemLocations(ArenaAllocator* arena, } } -// Helper to set up registers and address for vector memory operations. -static Address CreateVecMemRegisters(HVecMemoryOperation* instruction, - Location* reg_loc, - bool is_load) { - LocationSummary* locations = instruction->GetLocations(); +// Helper to construct address for vector memory operations. +static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) { Location base = locations->InAt(0); Location index = locations->InAt(1); - *reg_loc = is_load ? locations->Out() : locations->InAt(2); - size_t size = Primitive::ComponentSize(instruction->GetPackedType()); - uint32_t offset = mirror::Array::DataOffset(size).Uint32Value(); ScaleFactor scale = TIMES_1; switch (size) { case 2: scale = TIMES_2; break; @@ -772,22 +859,53 @@ static Address CreateVecMemRegisters(HVecMemoryOperation* instruction, case 8: scale = TIMES_8; break; default: break; } + uint32_t offset = is_string_char_at + ? mirror::String::ValueOffset().Uint32Value() + : mirror::Array::DataOffset(size).Uint32Value(); return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset); } void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) { CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true); + // String load requires a temporary for the compressed load. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + } } void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) { - Location reg_loc = Location::NoLocation(); - Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ true); - XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + Address address = VecAddress(locations, size, instruction->IsStringCharAt()); + XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + DCHECK_EQ(8u, instruction->GetVectorLength()); + // Special handling of compressed/uncompressed string load. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + NearLabel done, not_compressed; + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + // Test compression bit. + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + __ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1)); + __ j(kNotZero, ¬_compressed); + // Zero extend 8 compressed bytes into 8 chars. + __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true)); + __ pxor(tmp, tmp); + __ punpcklbw(reg, tmp); + __ jmp(&done); + // Load 8 direct uncompressed chars. + __ Bind(¬_compressed); + is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address); + __ Bind(&done); + return; + } + FALLTHROUGH_INTENDED; case Primitive::kPrimBoolean: case Primitive::kPrimByte: - case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: @@ -814,9 +932,10 @@ void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) { } void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) { - Location reg_loc = Location::NoLocation(); - Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ false); - XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + Address address = VecAddress(locations, size, /*is_string_char_at*/ false); + XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>(); bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 7e640c284f..b8465cd9d5 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -26,6 +26,7 @@ #include "intrinsics_x86.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" +#include "lock_word.h" #include "thread.h" #include "utils/assembler.h" #include "utils/stack_checks.h" @@ -1032,9 +1033,10 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, assembler_(graph->GetArena()), isa_features_(isa_features), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), constant_area_start_(-1), @@ -2066,6 +2068,15 @@ void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant // Will be generated at use site. } +void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) { + constructor_fence->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86::VisitConstructorFence( + HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } @@ -2158,7 +2169,7 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok IntrinsicLocationsBuilderX86 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { - if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) { + if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeMethodLoadKind()) { invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); } return; @@ -2167,7 +2178,7 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok HandleInvoke(invoke); // For PC-relative dex cache the invoke has an extra input, the PC-relative address base. - if (invoke->HasPcRelativeDexCache()) { + if (invoke->HasPcRelativeMethodLoadKind()) { invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); } } @@ -4510,18 +4521,16 @@ Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOr // save one load. However, since this is just an intrinsic slow path we prefer this // simple and more robust approach rather that trying to determine if that's the case. SlowPathCode* slow_path = GetCurrentSlowPath(); - if (slow_path != nullptr) { - if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { - int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>()); - __ movl(temp, Address(ESP, stack_offset)); - return temp; - } + DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path. + if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { + int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>()); + __ movl(temp, Address(ESP, stack_offset)); + return temp; } return location.AsRegister<Register>(); } -Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Location temp) { +void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { @@ -4534,6 +4543,14 @@ Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { + DCHECK(GetCompilerOptions().IsBootImage()); + Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, + temp.AsRegister<Register>()); + __ leal(temp.AsRegister<Register>(), Address(base_reg, CodeGeneratorX86::kDummy32BitOffset)); + RecordBootMethodPatch(invoke); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress())); break; @@ -4571,11 +4588,6 @@ Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO break; } } - return callee_method; -} - -void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { - Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp); switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: @@ -4622,27 +4634,18 @@ void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value())); } -void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) { - DCHECK(GetCompilerOptions().IsBootImage()); - HX86ComputeBaseMethodAddress* address = nullptr; - if (GetCompilerOptions().GetCompilePic()) { - address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); - } else { - DCHECK_EQ(load_string->InputCount(), 0u); - } - string_patches_.emplace_back(address, - load_string->GetDexFile(), - load_string->GetStringIndex().index_); - __ Bind(&string_patches_.back().label); +void CodeGeneratorX86::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + HX86ComputeBaseMethodAddress* address = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); + boot_image_method_patches_.emplace_back(address, + *invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); + __ Bind(&boot_image_method_patches_.back().label); } void CodeGeneratorX86::RecordBootTypePatch(HLoadClass* load_class) { - HX86ComputeBaseMethodAddress* address = nullptr; - if (GetCompilerOptions().GetCompilePic()) { - address = load_class->InputAt(0)->AsX86ComputeBaseMethodAddress(); - } else { - DCHECK_EQ(load_class->InputCount(), 0u); - } + HX86ComputeBaseMethodAddress* address = load_class->InputAt(0)->AsX86ComputeBaseMethodAddress(); boot_image_type_patches_.emplace_back(address, load_class->GetDexFile(), load_class->GetTypeIndex().index_); @@ -4657,6 +4660,15 @@ Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) { return &type_bss_entry_patches_.back().label; } +void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) { + DCHECK(GetCompilerOptions().IsBootImage()); + HX86ComputeBaseMethodAddress* address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); + string_patches_.emplace_back(address, + load_string->GetDexFile(), + load_string->GetStringIndex().index_); + __ Bind(&string_patches_.back().label); +} + Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) { DCHECK(!GetCompilerOptions().IsBootImage()); HX86ComputeBaseMethodAddress* address = @@ -4694,29 +4706,23 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche DCHECK(linker_patches->empty()); size_t size = pc_relative_dex_cache_patches_.size() + - string_patches_.size() + + boot_image_method_patches_.size() + boot_image_type_patches_.size() + - type_bss_entry_patches_.size(); + type_bss_entry_patches_.size() + + string_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); - if (!GetCompilerOptions().IsBootImage()) { - DCHECK(boot_image_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); - } else if (GetCompilerOptions().GetCompilePic()) { + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_, + linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches); } else { - for (const PatchInfo<Label>& info : boot_image_type_patches_) { - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, &info.dex_file, info.index)); - } - for (const PatchInfo<Label>& info : string_patches_) { - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back( - LinkerPatch::StringPatch(literal_offset, &info.dex_file, info.index)); - } + DCHECK(boot_image_method_patches_.empty()); + DCHECK(boot_image_type_patches_.empty()); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); @@ -6045,21 +6051,15 @@ HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind( UNREACHABLE(); case HLoadClass::LoadKind::kReferrersClass: break; - case HLoadClass::LoadKind::kBootImageLinkTimeAddress: - DCHECK(!GetCompilerOptions().GetCompilePic()); - break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - DCHECK(GetCompilerOptions().GetCompilePic()); - FALLTHROUGH_INTENDED; case HLoadClass::LoadKind::kBssEntry: - DCHECK(!Runtime::Current()->UseJitCompilation()); // Note: boot image is also non-JIT. - break; - case HLoadClass::LoadKind::kBootImageAddress: + DCHECK(!Runtime::Current()->UseJitCompilation()); break; case HLoadClass::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kBootImageAddress: + case HLoadClass::LoadKind::kRuntimeCall: break; } return desired_class_load_kind; @@ -6067,7 +6067,7 @@ HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind( void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( cls, @@ -6121,7 +6121,7 @@ Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file, // move. void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); return; } @@ -6149,13 +6149,6 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE read_barrier_option); break; } - case HLoadClass::LoadKind::kBootImageLinkTimeAddress: { - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - __ movl(out, Immediate(/* placeholder */ 0)); - codegen_->RecordBootTypePatch(cls); - break; - } case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); @@ -6188,7 +6181,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option); break; } - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: case HLoadClass::LoadKind::kInvalid: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -6243,21 +6236,15 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimeAddress: - DCHECK(!GetCompilerOptions().GetCompilePic()); - break; case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - DCHECK(GetCompilerOptions().GetCompilePic()); - FALLTHROUGH_INTENDED; case HLoadString::LoadKind::kBssEntry: - DCHECK(!Runtime::Current()->UseJitCompilation()); // Note: boot image is also non-JIT. - break; - case HLoadString::LoadKind::kBootImageAddress: + DCHECK(!Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kBootImageAddress: + case HLoadString::LoadKind::kRuntimeCall: break; } return desired_string_load_kind; @@ -6271,7 +6258,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) { load_kind == HLoadString::LoadKind::kBssEntry) { locations->SetInAt(0, Location::RequiresRegister()); } - if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadString::LoadKind::kRuntimeCall) { locations->SetOut(Location::RegisterLocation(EAX)); } else { locations->SetOut(Location::RequiresRegister()); @@ -6308,12 +6295,6 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S Register out = out_loc.AsRegister<Register>(); switch (load->GetLoadKind()) { - case HLoadString::LoadKind::kBootImageLinkTimeAddress: { - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); - __ movl(out, Immediate(/* placeholder */ 0)); - codegen_->RecordBootStringPatch(load); - return; // No dex cache slow path. - } case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); Register method_address = locations->InAt(0).AsRegister<Register>(); @@ -7694,7 +7675,7 @@ void CodeGeneratorX86::Finalize(CodeAllocator* allocator) { constant_area_start_ = assembler->CodeSize(); // Populate any jump tables. - for (auto jump_table : fixups_to_jump_tables_) { + for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) { jump_table->CreateJumpTable(); } @@ -7833,17 +7814,19 @@ void CodeGeneratorX86::PatchJitRootUse(uint8_t* code, void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { for (const PatchInfo<Label>& info : jit_string_patches_) { - const auto& it = jit_string_roots_.find( + const auto it = jit_string_roots_.find( StringReference(&info.dex_file, dex::StringIndex(info.index))); DCHECK(it != jit_string_roots_.end()); - PatchJitRootUse(code, roots_data, info, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, info, index_in_table); } for (const PatchInfo<Label>& info : jit_class_patches_) { - const auto& it = jit_class_roots_.find( + const auto it = jit_class_roots_.find( TypeReference(&info.dex_file, dex::TypeIndex(info.index))); DCHECK(it != jit_class_roots_.end()); - PatchJitRootUse(code, roots_data, info, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, info, index_in_table); } } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index ca3a9eadd2..8130bd9d25 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -408,14 +408,14 @@ class CodeGeneratorX86 : public CodeGenerator { HInvokeStaticOrDirect* invoke) OVERRIDE; // Generate a call to a static or direct method. - Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; // Generate a call to a virtual method. void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; - void RecordBootStringPatch(HLoadString* load_string); + void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke); void RecordBootTypePatch(HLoadClass* load_class); Label* NewTypeBssEntryPatch(HLoadClass* load_class); + void RecordBootStringPatch(HLoadString* load_string); Label* NewStringBssEntryPatch(HLoadString* load_string); Label* NewPcRelativeDexCacheArrayPatch(HX86ComputeBaseMethodAddress* method_address, const DexFile& dex_file, @@ -633,16 +633,17 @@ class CodeGeneratorX86 : public CodeGenerator { // PC-relative DexCache access info. ArenaDeque<X86PcRelativePatchInfo> pc_relative_dex_cache_patches_; - // String patch locations; type depends on configuration (app .bss or boot image PIC/non-PIC). - ArenaDeque<X86PcRelativePatchInfo> string_patches_; - // Type patch locations for boot image; type depends on configuration (boot image PIC/non-PIC). + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_; + // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_; // Type patch locations for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_; + // String patch locations; type depends on configuration (app .bss or boot image). + ArenaDeque<X86PcRelativePatchInfo> string_patches_; // Patches for string root accesses in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_string_patches_; - // Patches for class root accesses in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_class_patches_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index dfb11aaba5..8dde298267 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -23,6 +23,7 @@ #include "gc/accounting/card_table.h" #include "intrinsics.h" #include "intrinsics_x86_64.h" +#include "lock_word.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "mirror/object_reference.h" @@ -976,9 +977,10 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStati return desired_dispatch_info; } -Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Location temp) { +void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, + Location temp) { // All registers are assumed to be correctly set up. + Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { @@ -991,6 +993,12 @@ Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStat case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: + DCHECK(GetCompilerOptions().IsBootImage()); + __ leal(temp.AsRegister<CpuRegister>(), + Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); + RecordBootMethodPatch(invoke); + break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress()); break; @@ -1025,13 +1033,6 @@ Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStat break; } } - return callee_method; -} - -void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Location temp) { - // All registers are assumed to be correctly set up. - Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp); switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: @@ -1079,10 +1080,10 @@ void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t kX86_64PointerSize).SizeValue())); } -void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) { - DCHECK(GetCompilerOptions().IsBootImage()); - string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); - __ Bind(&string_patches_.back().label); +void CodeGeneratorX86_64::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { + boot_image_method_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); + __ Bind(&boot_image_method_patches_.back().label); } void CodeGeneratorX86_64::RecordBootTypePatch(HLoadClass* load_class) { @@ -1096,6 +1097,12 @@ Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) { return &type_bss_entry_patches_.back().label; } +void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) { + DCHECK(GetCompilerOptions().IsBootImage()); + string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); + __ Bind(&string_patches_.back().label); +} + Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) { DCHECK(!GetCompilerOptions().IsBootImage()); string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); @@ -1128,20 +1135,23 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat DCHECK(linker_patches->empty()); size_t size = pc_relative_dex_cache_patches_.size() + - string_patches_.size() + + boot_image_method_patches_.size() + boot_image_type_patches_.size() + - type_bss_entry_patches_.size(); + type_bss_entry_patches_.size() + + string_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); - if (!GetCompilerOptions().IsBootImage()) { - DCHECK(boot_image_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); - } else { - // These are always PC-relative, see GetSupportedLoadClassKind()/GetSupportedLoadStringKind(). + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_, + linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches); + } else { + DCHECK(boot_image_method_patches_.empty()); + DCHECK(boot_image_type_patches_.empty()); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); @@ -1232,12 +1242,13 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, isa_features_(isa_features), constant_area_start_(0), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -2174,6 +2185,15 @@ void InstructionCodeGeneratorX86_64::VisitDoubleConstant( // Will be generated at use site. } +void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) { + constructor_fence->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86_64::VisitConstructorFence( + HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } @@ -5449,22 +5469,15 @@ HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( UNREACHABLE(); case HLoadClass::LoadKind::kReferrersClass: break; - case HLoadClass::LoadKind::kBootImageLinkTimeAddress: - DCHECK(!GetCompilerOptions().GetCompilePic()); - // We prefer the always-available RIP-relative address for the x86-64 boot image. - return HLoadClass::LoadKind::kBootImageLinkTimePcRelative; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - DCHECK(GetCompilerOptions().GetCompilePic()); - break; - case HLoadClass::LoadKind::kBootImageAddress: - break; case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; case HLoadClass::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kBootImageAddress: + case HLoadClass::LoadKind::kRuntimeCall: break; } return desired_class_load_kind; @@ -5472,7 +5485,7 @@ HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { // Custom calling convention: RAX serves as both input and output. CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( cls, @@ -5523,7 +5536,7 @@ Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file, // move. void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); return; } @@ -5626,22 +5639,15 @@ void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) { HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimeAddress: - DCHECK(!GetCompilerOptions().GetCompilePic()); - // We prefer the always-available RIP-relative address for the x86-64 boot image. - return HLoadString::LoadKind::kBootImageLinkTimePcRelative; case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - DCHECK(GetCompilerOptions().GetCompilePic()); - break; - case HLoadString::LoadKind::kBootImageAddress: - break; case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kBootImageAddress: + case HLoadString::LoadKind::kRuntimeCall: break; } return desired_string_load_kind; @@ -5650,7 +5656,7 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); - if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) { + if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) { locations->SetOut(Location::RegisterLocation(RAX)); } else { locations->SetOut(Location::RequiresRegister()); @@ -7046,7 +7052,7 @@ void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { constant_area_start_ = assembler->CodeSize(); // Populate any jump tables. - for (auto jump_table : fixups_to_jump_tables_) { + for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) { jump_table->CreateJumpTable(); } @@ -7140,17 +7146,19 @@ void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code, void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { for (const PatchInfo<Label>& info : jit_string_patches_) { - const auto& it = jit_string_roots_.find( + const auto it = jit_string_roots_.find( StringReference(&info.dex_file, dex::StringIndex(info.index))); DCHECK(it != jit_string_roots_.end()); - PatchJitRootUse(code, roots_data, info, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, info, index_in_table); } for (const PatchInfo<Label>& info : jit_class_patches_) { - const auto& it = jit_class_roots_.find( + const auto it = jit_class_roots_.find( TypeReference(&info.dex_file, dex::TypeIndex(info.index))); DCHECK(it != jit_class_roots_.end()); - PatchJitRootUse(code, roots_data, info, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, info, index_in_table); } } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index c8336dabd9..25479814d0 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -404,13 +404,13 @@ class CodeGeneratorX86_64 : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; - void RecordBootStringPatch(HLoadString* load_string); + void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke); void RecordBootTypePatch(HLoadClass* load_class); Label* NewTypeBssEntryPatch(HLoadClass* load_class); + void RecordBootStringPatch(HLoadString* load_string); Label* NewStringBssEntryPatch(HLoadString* load_string); Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); Label* NewJitRootStringPatch(const DexFile& dex_file, @@ -603,22 +603,23 @@ class CodeGeneratorX86_64 : public CodeGenerator { // PC-relative DexCache access info. ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_; - // String patch locations; type depends on configuration (app .bss or boot image PIC). - ArenaDeque<PatchInfo<Label>> string_patches_; - // Type patch locations for boot image (always PIC). + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PatchInfo<Label>> boot_image_method_patches_; + // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PatchInfo<Label>> boot_image_type_patches_; // Type patch locations for kBssEntry. ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_; - - // Fixups for jump tables need to be handled specially. - ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; + // String patch locations; type depends on configuration (app .bss or boot image). + ArenaDeque<PatchInfo<Label>> string_patches_; // Patches for string literals in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_string_patches_; - // Patches for class literals in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_class_patches_; + // Fixups for jump tables need to be handled specially. + ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; + DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64); }; diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc index 0b4dcd30a1..e598e19b67 100644 --- a/compiler/optimizing/code_sinking.cc +++ b/compiler/optimizing/code_sinking.cc @@ -56,6 +56,17 @@ static bool IsInterestingInstruction(HInstruction* instruction) { return true; } + // Check it is safe to move ConstructorFence. + // (Safe to move ConstructorFence for only protecting the new-instance but not for finals.) + if (instruction->IsConstructorFence()) { + HConstructorFence* ctor_fence = instruction->AsConstructorFence(); + + // A fence with "0" inputs is dead and should've been removed in a prior pass. + DCHECK_NE(0u, ctor_fence->InputCount()); + + return ctor_fence->GetAssociatedAllocation() != nullptr; + } + // All other instructions that can throw cannot be moved. if (instruction->CanThrow()) { return false; @@ -134,11 +145,11 @@ static bool ShouldFilterUse(HInstruction* instruction, HInstruction* user, const ArenaBitVector& post_dominated) { if (instruction->IsNewInstance()) { - return user->IsInstanceFieldSet() && + return (user->IsInstanceFieldSet() || user->IsConstructorFence()) && (user->InputAt(0) == instruction) && !post_dominated.IsBitSet(user->GetBlock()->GetBlockId()); } else if (instruction->IsNewArray()) { - return user->IsArraySet() && + return (user->IsArraySet() || user->IsConstructorFence()) && (user->InputAt(0) == instruction) && !post_dominated.IsBitSet(user->GetBlock()->GetBlockId()); } @@ -372,7 +383,9 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { // Step (3): Try to move sinking candidates. for (HInstruction* instruction : move_in_order) { HInstruction* position = nullptr; - if (instruction->IsArraySet() || instruction->IsInstanceFieldSet()) { + if (instruction->IsArraySet() + || instruction->IsInstanceFieldSet() + || instruction->IsConstructorFence()) { if (!instructions_that_can_move.IsBitSet(instruction->InputAt(0)->GetId())) { // A store can trivially move, but it can safely do so only if the heap // location it stores to can also move. diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 4ba5c5580f..fe25b7690d 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -64,7 +64,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() { #endif }; - for (auto test_config : test_config_candidates) { + for (const CodegenTargetConfig& test_config : test_config_candidates) { if (CanExecute(test_config.GetInstructionSet())) { v.push_back(test_config); } @@ -76,7 +76,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() { static void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0) { - for (CodegenTargetConfig target_config : GetTargetConfigs()) { + for (const CodegenTargetConfig& target_config : GetTargetConfigs()) { ArenaPool pool; ArenaAllocator arena(&pool); HGraph* graph = CreateCFG(&arena, data); @@ -89,7 +89,7 @@ static void TestCode(const uint16_t* data, static void TestCodeLong(const uint16_t* data, bool has_result, int64_t expected) { - for (CodegenTargetConfig target_config : GetTargetConfigs()) { + for (const CodegenTargetConfig& target_config : GetTargetConfigs()) { ArenaPool pool; ArenaAllocator arena(&pool); HGraph* graph = CreateCFG(&arena, data, Primitive::kPrimLong); @@ -754,7 +754,28 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) { // // Assertion failed (!available->IsEmpty()) // - // in vixl::aarch64::UseScratchRegisterScope::AcquireNextAvailable. + // in vixl::aarch64::UseScratchRegisterScope::AcquireNextAvailable, + // because of the following situation: + // + // 1. a temp register (IP0) is allocated as a scratch register by + // the parallel move resolver to solve a cycle (swap): + // + // [ source=DS0 destination=DS257 type=PrimDouble instruction=null ] + // [ source=DS257 destination=DS0 type=PrimDouble instruction=null ] + // + // 2. within CodeGeneratorARM64::MoveLocation, another temp + // register (IP1) is allocated to generate the swap between two + // double stack slots; + // + // 3. VIXL requires a third temp register to emit the `Ldr` or + // `Str` operation from CodeGeneratorARM64::MoveLocation (as + // one of the stack slots' offsets cannot be encoded as an + // immediate), but the pool of (core) temp registers is now + // empty. + // + // The solution used so far is to use a floating-point temp register + // (D31) in step #2, so that IP1 is available for step #3. + HParallelMove* move = new (graph->GetArena()) HParallelMove(graph->GetArena()); move->AddMove(Location::DoubleStackSlot(0), Location::DoubleStackSlot(257), @@ -807,7 +828,6 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) { InternalCodeAllocator code_allocator; codegen.Finalize(&code_allocator); } - #endif #ifdef ART_ENABLE_CODEGEN_mips diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h index 31cd204c9f..00a16fe849 100644 --- a/compiler/optimizing/codegen_test_utils.h +++ b/compiler/optimizing/codegen_test_utils.h @@ -243,7 +243,7 @@ static void ValidateGraph(HGraph* graph) { GraphChecker graph_checker(graph); graph_checker.Run(); if (!graph_checker.IsValid()) { - for (const auto& error : graph_checker.GetErrors()) { + for (const std::string& error : graph_checker.GetErrors()) { std::cout << error << std::endl; } } diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 34b52a87b5..aea901dec7 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -338,14 +338,21 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { // Ensure the inputs of `instruction` are defined in a block of the graph. for (HInstruction* input : instruction->GetInputs()) { - const HInstructionList& list = input->IsPhi() - ? input->GetBlock()->GetPhis() - : input->GetBlock()->GetInstructions(); - if (!list.Contains(input)) { - AddError(StringPrintf("Input %d of instruction %d is not defined " - "in a basic block of the control-flow graph.", + if (input->GetBlock() == nullptr) { + AddError(StringPrintf("Input %d of instruction %d is not in any " + "basic block of the control-flow graph.", input->GetId(), instruction->GetId())); + } else { + const HInstructionList& list = input->IsPhi() + ? input->GetBlock()->GetPhis() + : input->GetBlock()->GetInstructions(); + if (!list.Contains(input)) { + AddError(StringPrintf("Input %d of instruction %d is not defined " + "in a basic block of the control-flow graph.", + input->GetId(), + instruction->GetId())); + } } } @@ -497,8 +504,7 @@ void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { "has a null pointer as last input.", invoke->DebugName(), invoke->GetId())); - } - if (!last_input->IsClinitCheck() && !last_input->IsLoadClass()) { + } else if (!last_input->IsClinitCheck() && !last_input->IsLoadClass()) { AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check " "has a last instruction (%s:%d) which is neither a clinit check " "nor a load class instruction.", diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index e5d94c3504..02816cf7ce 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -514,6 +514,14 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha; } + void VisitVecMin(HVecMin* min) OVERRIDE { + StartAttributeStream("unsigned") << std::boolalpha << min->IsUnsigned() << std::noboolalpha; + } + + void VisitVecMax(HVecMax* max) OVERRIDE { + StartAttributeStream("unsigned") << std::boolalpha << max->IsUnsigned() << std::noboolalpha; + } + void VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) OVERRIDE { StartAttributeStream("kind") << instruction->GetOpKind(); } diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index c93bc210be..8ea312d0ea 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -516,13 +516,13 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { bool GlobalValueNumberer::WillBeReferencedAgain(HBasicBlock* block) const { DCHECK(visited_blocks_.IsBitSet(block->GetBlockId())); - for (auto dominated_block : block->GetDominatedBlocks()) { + for (const HBasicBlock* dominated_block : block->GetDominatedBlocks()) { if (!visited_blocks_.IsBitSet(dominated_block->GetBlockId())) { return true; } } - for (auto successor : block->GetSuccessors()) { + for (const HBasicBlock* successor : block->GetSuccessors()) { if (!visited_blocks_.IsBitSet(successor->GetBlockId())) { return true; } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 4f6ca17de0..142c95780e 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -140,6 +140,14 @@ void HInliner::Run() { DCHECK_NE(total_number_of_instructions_, 0u); DCHECK_NE(inlining_budget_, 0u); + // If we're compiling with a core image (which is only used for + // test purposes), honor inlining directives in method names: + // - if a method's name contains the substring "$inline$", ensure + // that this method is actually inlined; + // - if a method's name contains the substring "$noinline$", do not + // inline that method. + const bool honor_inlining_directives = IsCompilingWithCoreImage(); + // Keep a copy of all blocks when starting the visit. ArenaVector<HBasicBlock*> blocks = graph_->GetReversePostOrder(); DCHECK(!blocks.empty()); @@ -152,7 +160,7 @@ void HInliner::Run() { HInvoke* call = instruction->AsInvoke(); // As long as the call is not intrinsified, it is worth trying to inline. if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) { - if (kIsDebugBuild && IsCompilingWithCoreImage()) { + if (honor_inlining_directives) { // Debugging case: directives in method names control or assert on inlining. std::string callee_name = outer_compilation_unit_.GetDexFile()->PrettyMethod( call->GetDexMethodIndex(), /* with_signature */ false); @@ -1501,8 +1509,13 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction, } } if (needs_constructor_barrier) { - HMemoryBarrier* barrier = new (graph_->GetArena()) HMemoryBarrier(kStoreStore, kNoDexPc); - invoke_instruction->GetBlock()->InsertInstructionBefore(barrier, invoke_instruction); + // See CompilerDriver::RequiresConstructorBarrier for more details. + DCHECK(obj != nullptr) << "only non-static methods can have a constructor fence"; + + HConstructorFence* constructor_fence = + new (graph_->GetArena()) HConstructorFence(obj, kNoDexPc, graph_->GetArena()); + invoke_instruction->GetBlock()->InsertInstructionBefore(constructor_fence, + invoke_instruction); } *return_replacement = nullptr; break; @@ -1870,7 +1883,7 @@ void HInliner::RunOptimizations(HGraph* callee_graph, HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner"); HConstantFolding fold(callee_graph, "constant_folding$inliner"); HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_, handles_); - InstructionSimplifier simplify(callee_graph, codegen_, inline_stats_); + InstructionSimplifier simplify(callee_graph, codegen_, compiler_driver_, inline_stats_); IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_); HOptimization* optimizations[] = { diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 978c6a2d71..df9e7164ed 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -451,10 +451,13 @@ void HInstructionBuilder::InitializeParameters() { referrer_method_id.class_idx_, parameter_index++, Primitive::kPrimNot, - true); + /* is_this */ true); AppendInstruction(parameter); UpdateLocal(locals_index++, parameter); number_of_parameters--; + current_this_parameter_ = parameter; + } else { + DCHECK(current_this_parameter_ == nullptr); } const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id); @@ -465,7 +468,7 @@ void HInstructionBuilder::InitializeParameters() { arg_types->GetTypeItem(shorty_pos - 1).type_idx_, parameter_index++, Primitive::GetType(shorty[shorty_pos]), - false); + /* is_this */ false); ++shorty_pos; AppendInstruction(parameter); // Store the parameter value in the local that the dex code will use @@ -588,6 +591,8 @@ void HInstructionBuilder::Binop_22b(const Instruction& instruction, bool reverse UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); } +// Does the method being compiled need any constructor barriers being inserted? +// (Always 'false' for methods that aren't <init>.) static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, CompilerDriver* driver) { // Can be null in unit tests only. if (UNLIKELY(cu == nullptr)) { @@ -596,6 +601,11 @@ static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, CompilerDri Thread* self = Thread::Current(); return cu->IsConstructor() + && !cu->IsStatic() + // RequiresConstructorBarrier must only be queried for <init> methods; + // it's effectively "false" for every other method. + // + // See CompilerDriver::RequiresConstructBarrier for more explanation. && driver->RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex()); } @@ -639,13 +649,24 @@ void HInstructionBuilder::BuildReturn(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc) { if (type == Primitive::kPrimVoid) { + // Only <init> (which is a return-void) could possibly have a constructor fence. // This may insert additional redundant constructor fences from the super constructors. // TODO: remove redundant constructor fences (b/36656456). if (RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_)) { - AppendInstruction(new (arena_) HMemoryBarrier(kStoreStore, dex_pc)); + // Compiling instance constructor. + if (kIsDebugBuild) { + std::string method_name = graph_->GetMethodName(); + CHECK_EQ(std::string("<init>"), method_name); + } + + HInstruction* fence_target = current_this_parameter_; + DCHECK(fence_target != nullptr); + + AppendInstruction(new (arena_) HConstructorFence(fence_target, dex_pc, arena_)); } AppendInstruction(new (arena_) HReturnVoid(dex_pc)); } else { + DCHECK(!RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_)); HInstruction* value = LoadLocal(instruction.VRegA(), type); AppendInstruction(new (arena_) HReturn(value, dex_pc)); } @@ -941,7 +962,7 @@ bool HInstructionBuilder::BuildInvokePolymorphic(const Instruction& instruction false /* is_unresolved */); } -bool HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t dex_pc) { +HNewInstance* HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t dex_pc) { ScopedObjectAccess soa(Thread::Current()); HLoadClass* load_class = BuildLoadClass(type_index, dex_pc); @@ -965,14 +986,65 @@ bool HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t d // Consider classes we haven't resolved as potentially finalizable. bool finalizable = (klass == nullptr) || klass->IsFinalizable(); - AppendInstruction(new (arena_) HNewInstance( + HNewInstance* new_instance = new (arena_) HNewInstance( cls, dex_pc, type_index, *dex_compilation_unit_->GetDexFile(), finalizable, - entrypoint)); - return true; + entrypoint); + AppendInstruction(new_instance); + + return new_instance; +} + +void HInstructionBuilder::BuildConstructorFenceForAllocation(HInstruction* allocation) { + DCHECK(allocation != nullptr && + (allocation->IsNewInstance() || + allocation->IsNewArray())); // corresponding to "new" keyword in JLS. + + if (allocation->IsNewInstance()) { + // STRING SPECIAL HANDLING: + // ------------------------------- + // Strings have a real HNewInstance node but they end up always having 0 uses. + // All uses of a String HNewInstance are always transformed to replace their input + // of the HNewInstance with an input of the invoke to StringFactory. + // + // Do not emit an HConstructorFence here since it can inhibit some String new-instance + // optimizations (to pass checker tests that rely on those optimizations). + HNewInstance* new_inst = allocation->AsNewInstance(); + HLoadClass* load_class = new_inst->GetLoadClass(); + + Thread* self = Thread::Current(); + ScopedObjectAccess soa(self); + StackHandleScope<1> hs(self); + Handle<mirror::Class> klass = load_class->GetClass(); + if (klass != nullptr && klass->IsStringClass()) { + return; + // Note: Do not use allocation->IsStringAlloc which requires + // a valid ReferenceTypeInfo, but that doesn't get made until after reference type + // propagation (and instruction builder is too early). + } + // (In terms of correctness, the StringFactory needs to provide its own + // default initialization barrier, see below.) + } + + // JLS 17.4.5 "Happens-before Order" describes: + // + // The default initialization of any object happens-before any other actions (other than + // default-writes) of a program. + // + // In our implementation the default initialization of an object to type T means + // setting all of its initial data (object[0..size)) to 0, and setting the + // object's class header (i.e. object.getClass() == T.class). + // + // In practice this fence ensures that the writes to the object header + // are visible to other threads if this object escapes the current thread. + // (and in theory the 0-initializing, but that happens automatically + // when new memory pages are mapped in by the OS). + HConstructorFence* ctor_fence = + new (arena_) HConstructorFence(allocation, allocation->GetDexPc(), arena_); + AppendInstruction(ctor_fence); } static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class) @@ -1501,15 +1573,15 @@ void HInstructionBuilder::BuildArrayAccess(const Instruction& instruction, graph_->SetHasBoundsChecks(true); } -void HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc, - dex::TypeIndex type_index, - uint32_t number_of_vreg_arguments, - bool is_range, - uint32_t* args, - uint32_t register_index) { +HNewArray* HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc, + dex::TypeIndex type_index, + uint32_t number_of_vreg_arguments, + bool is_range, + uint32_t* args, + uint32_t register_index) { HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc); HLoadClass* cls = BuildLoadClass(type_index, dex_pc); - HInstruction* object = new (arena_) HNewArray(cls, length, dex_pc); + HNewArray* const object = new (arena_) HNewArray(cls, length, dex_pc); AppendInstruction(object); const char* descriptor = dex_file_->StringByTypeIdx(type_index); @@ -1529,6 +1601,8 @@ void HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc, AppendInstruction(aset); } latest_result_ = object; + + return object; } template <typename T> @@ -2513,10 +2587,12 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, } case Instruction::NEW_INSTANCE: { - if (!BuildNewInstance(dex::TypeIndex(instruction.VRegB_21c()), dex_pc)) { - return false; - } + HNewInstance* new_instance = + BuildNewInstance(dex::TypeIndex(instruction.VRegB_21c()), dex_pc); + DCHECK(new_instance != nullptr); + UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); + BuildConstructorFenceForAllocation(new_instance); break; } @@ -2524,8 +2600,11 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, dex::TypeIndex type_index(instruction.VRegC_22c()); HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt); HLoadClass* cls = BuildLoadClass(type_index, dex_pc); - AppendInstruction(new (arena_) HNewArray(cls, length, dex_pc)); + + HNewArray* new_array = new (arena_) HNewArray(cls, length, dex_pc); + AppendInstruction(new_array); UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction()); + BuildConstructorFenceForAllocation(new_array); break; } @@ -2534,7 +2613,13 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, dex::TypeIndex type_index(instruction.VRegB_35c()); uint32_t args[5]; instruction.GetVarArgs(args); - BuildFilledNewArray(dex_pc, type_index, number_of_vreg_arguments, false, args, 0); + HNewArray* new_array = BuildFilledNewArray(dex_pc, + type_index, + number_of_vreg_arguments, + /* is_range */ false, + args, + /* register_index */ 0); + BuildConstructorFenceForAllocation(new_array); break; } @@ -2542,8 +2627,13 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, uint32_t number_of_vreg_arguments = instruction.VRegA_3rc(); dex::TypeIndex type_index(instruction.VRegB_3rc()); uint32_t register_index = instruction.VRegC_3rc(); - BuildFilledNewArray( - dex_pc, type_index, number_of_vreg_arguments, true, nullptr, register_index); + HNewArray* new_array = BuildFilledNewArray(dex_pc, + type_index, + number_of_vreg_arguments, + /* is_range */ true, + /* args*/ nullptr, + register_index); + BuildConstructorFenceForAllocation(new_array); break; } diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 7fdc1883ca..e968760d84 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -62,6 +62,7 @@ class HInstructionBuilder : public ValueObject { current_block_(nullptr), current_locals_(nullptr), latest_result_(nullptr), + current_this_parameter_(nullptr), compiler_driver_(driver), code_generator_(code_generator), dex_compilation_unit_(dex_compilation_unit), @@ -193,12 +194,12 @@ class HInstructionBuilder : public ValueObject { uint32_t register_index); // Builds a new array node and the instructions that fill it. - void BuildFilledNewArray(uint32_t dex_pc, - dex::TypeIndex type_index, - uint32_t number_of_vreg_arguments, - bool is_range, - uint32_t* args, - uint32_t register_index); + HNewArray* BuildFilledNewArray(uint32_t dex_pc, + dex::TypeIndex type_index, + uint32_t number_of_vreg_arguments, + bool is_range, + uint32_t* args, + uint32_t register_index); void BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc); @@ -287,7 +288,11 @@ class HInstructionBuilder : public ValueObject { REQUIRES_SHARED(Locks::mutator_lock_); // Build a HNewInstance instruction. - bool BuildNewInstance(dex::TypeIndex type_index, uint32_t dex_pc); + HNewInstance* BuildNewInstance(dex::TypeIndex type_index, uint32_t dex_pc); + + // Build a HConstructorFence for HNewInstance and HNewArray instructions. This ensures the + // happens-before ordering for default-initialization of the object referred to by new_instance. + void BuildConstructorFenceForAllocation(HInstruction* allocation); // Return whether the compiler can assume `cls` is initialized. bool IsInitialized(Handle<mirror::Class> cls) const @@ -325,6 +330,11 @@ class HInstructionBuilder : public ValueObject { HBasicBlock* current_block_; ArenaVector<HInstruction*>* current_locals_; HInstruction* latest_result_; + // Current "this" parameter. + // Valid only after InitializeParameters() finishes. + // * Null for static methods. + // * Non-null for instance methods. + HParameterValue* current_this_parameter_; CompilerDriver* const compiler_driver_; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 2dcc12e294..d14716601c 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -30,9 +30,11 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { public: InstructionSimplifierVisitor(HGraph* graph, CodeGenerator* codegen, + CompilerDriver* compiler_driver, OptimizingCompilerStats* stats) : HGraphDelegateVisitor(graph), codegen_(codegen), + compiler_driver_(compiler_driver), stats_(stats) {} void Run(); @@ -119,6 +121,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind); CodeGenerator* codegen_; + CompilerDriver* compiler_driver_; OptimizingCompilerStats* stats_; bool simplification_occurred_ = false; int simplifications_at_current_position_ = 0; @@ -130,7 +133,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { }; void InstructionSimplifier::Run() { - InstructionSimplifierVisitor visitor(graph_, codegen_, stats_); + InstructionSimplifierVisitor visitor(graph_, codegen_, compiler_driver_, stats_); visitor.Run(); } @@ -257,7 +260,8 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { if (shift_amount->IsConstant()) { int64_t cst = Int64FromConstant(shift_amount->AsConstant()); - if ((cst & implicit_mask) == 0) { + int64_t masked_cst = cst & implicit_mask; + if (masked_cst == 0) { // Replace code looking like // SHL dst, value, 0 // with @@ -266,6 +270,17 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { instruction->GetBlock()->RemoveInstruction(instruction); RecordSimplification(); return; + } else if (masked_cst != cst) { + // Replace code looking like + // SHL dst, value, cst + // where cst exceeds maximum distance with the equivalent + // SHL dst, value, cst & implicit_mask + // (as defined by shift semantics). This ensures other + // optimizations do not need to special case for such situations. + DCHECK_EQ(shift_amount->GetType(), Primitive::kPrimInt); + instruction->ReplaceInput(GetGraph()->GetIntConstant(masked_cst), /* index */ 1); + RecordSimplification(); + return; } } @@ -1884,7 +1899,7 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) // the invoke, as we would need to look it up in the current dex file, and it // is unlikely that it exists. The most usual situation for such typed // arraycopy methods is a direct pointer to the boot image. - HSharpening::SharpenInvokeStaticOrDirect(invoke, codegen_); + HSharpening::SharpenInvokeStaticOrDirect(invoke, codegen_, compiler_driver_); } } } diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h index f7329a4a1f..5e2045580b 100644 --- a/compiler/optimizing/instruction_simplifier.h +++ b/compiler/optimizing/instruction_simplifier.h @@ -24,6 +24,7 @@ namespace art { class CodeGenerator; +class CompilerDriver; /** * Implements optimizations specific to each instruction. @@ -37,12 +38,14 @@ class CodeGenerator; */ class InstructionSimplifier : public HOptimization { public: - explicit InstructionSimplifier(HGraph* graph, - CodeGenerator* codegen, - OptimizingCompilerStats* stats = nullptr, - const char* name = kInstructionSimplifierPassName) + InstructionSimplifier(HGraph* graph, + CodeGenerator* codegen, + CompilerDriver* compiler_driver, + OptimizingCompilerStats* stats = nullptr, + const char* name = kInstructionSimplifierPassName) : HOptimization(graph, name, stats), - codegen_(codegen) {} + codegen_(codegen), + compiler_driver_(compiler_driver) {} static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier"; @@ -50,6 +53,7 @@ class InstructionSimplifier : public HOptimization { private: CodeGenerator* codegen_; + CompilerDriver* compiler_driver_; DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier); }; diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index f16e3727c8..311be1fb49 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -216,5 +216,18 @@ void InstructionSimplifierArm64Visitor::VisitVecMul(HVecMul* instruction) { } } +void InstructionSimplifierArm64Visitor::VisitVecLoad(HVecLoad* instruction) { + if (!instruction->IsStringCharAt() + && TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) { + RecordSimplification(); + } +} + +void InstructionSimplifierArm64Visitor::VisitVecStore(HVecStore* instruction) { + if (TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) { + RecordSimplification(); + } +} + } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index eec4e49792..8596f6ad40 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -75,6 +75,8 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { void VisitUShr(HUShr* instruction) OVERRIDE; void VisitXor(HXor* instruction) OVERRIDE; void VisitVecMul(HVecMul* instruction) OVERRIDE; + void VisitVecLoad(HVecLoad* instruction) OVERRIDE; + void VisitVecStore(HVecStore* instruction) OVERRIDE; OptimizingCompilerStats* stats_; }; diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc index 7d1f146587..e5a8499ff4 100644 --- a/compiler/optimizing/instruction_simplifier_shared.cc +++ b/compiler/optimizing/instruction_simplifier_shared.cc @@ -16,6 +16,8 @@ #include "instruction_simplifier_shared.h" +#include "mirror/array-inl.h" + namespace art { namespace { @@ -247,6 +249,7 @@ bool TryExtractArrayAccessAddress(HInstruction* access, access->GetType() == Primitive::kPrimNot) { // For object arrays, the read barrier instrumentation requires // the original array pointer. + // TODO: This can be relaxed for Baker CC. return false; } @@ -345,4 +348,59 @@ bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa) { return false; } +bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index) { + if (index->IsConstant()) { + // If index is constant the whole address calculation often can be done by LDR/STR themselves. + // TODO: Treat the case with not-embedable constant. + return false; + } + + HGraph* graph = access->GetBlock()->GetGraph(); + ArenaAllocator* arena = graph->GetArena(); + Primitive::Type packed_type = access->GetPackedType(); + uint32_t data_offset = mirror::Array::DataOffset( + Primitive::ComponentSize(packed_type)).Uint32Value(); + size_t component_shift = Primitive::ComponentSizeShift(packed_type); + + bool is_extracting_beneficial = false; + // It is beneficial to extract index intermediate address only if there are at least 2 users. + for (const HUseListNode<HInstruction*>& use : index->GetUses()) { + HInstruction* user = use.GetUser(); + if (user->IsVecMemoryOperation() && user != access) { + HVecMemoryOperation* another_access = user->AsVecMemoryOperation(); + Primitive::Type another_packed_type = another_access->GetPackedType(); + uint32_t another_data_offset = mirror::Array::DataOffset( + Primitive::ComponentSize(another_packed_type)).Uint32Value(); + size_t another_component_shift = Primitive::ComponentSizeShift(another_packed_type); + if (another_data_offset == data_offset && another_component_shift == component_shift) { + is_extracting_beneficial = true; + break; + } + } else if (user->IsIntermediateAddressIndex()) { + HIntermediateAddressIndex* another_access = user->AsIntermediateAddressIndex(); + uint32_t another_data_offset = another_access->GetOffset()->AsIntConstant()->GetValue(); + size_t another_component_shift = another_access->GetShift()->AsIntConstant()->GetValue(); + if (another_data_offset == data_offset && another_component_shift == component_shift) { + is_extracting_beneficial = true; + break; + } + } + } + + if (!is_extracting_beneficial) { + return false; + } + + // Proceed to extract the index + data_offset address computation. + HIntConstant* offset = graph->GetIntConstant(data_offset); + HIntConstant* shift = graph->GetIntConstant(component_shift); + HIntermediateAddressIndex* address = + new (arena) HIntermediateAddressIndex(index, offset, shift, kNoDexPc); + + access->GetBlock()->InsertInstructionBefore(address, access); + access->ReplaceInput(address, 1); + + return true; +} + } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h index 2ea103a518..371619fa2e 100644 --- a/compiler/optimizing/instruction_simplifier_shared.h +++ b/compiler/optimizing/instruction_simplifier_shared.h @@ -59,6 +59,7 @@ bool TryExtractArrayAccessAddress(HInstruction* access, size_t data_offset); bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa); +bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index); } // namespace art diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 6236bd87ab..b664d41013 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -25,7 +25,7 @@ #include "mirror/dex_cache-inl.h" #include "nodes.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "utils.h" namespace art { @@ -146,7 +146,7 @@ void IntrinsicsRecognizer::Run() { Intrinsics intrinsic = static_cast<Intrinsics>(art_method->GetIntrinsic()); if (!CheckInvokeType(intrinsic, invoke)) { LOG(WARNING) << "Found an intrinsic with unexpected invoke type: " - << intrinsic << " for " + << static_cast<uint32_t>(intrinsic) << " for " << art_method->PrettyMethod() << invoke->DebugName(); } else { diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 750f9cc213..ae5f8d1760 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -28,7 +28,7 @@ #include "mirror/reference.h" #include "mirror/string.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "utils/arm/assembler_arm.h" namespace art { @@ -1010,17 +1010,14 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM* code if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Need to make sure the reference stored in the field is a to-space // one before attempting the CAS or the CAS could fail incorrectly. - codegen->GenerateReferenceLoadWithBakerReadBarrier( + codegen->UpdateReferenceFieldWithBakerReadBarrier( invoke, out_loc, // Unused, used only as a "temporary" within the read barrier. base, - /* offset */ 0u, - /* index */ offset_loc, - ScaleFactor::TIMES_1, + /* field_offset */ offset_loc, tmp_ptr_loc, /* needs_null_check */ false, - /* always_update_field */ true, - &tmp); + tmp); } } @@ -1648,6 +1645,8 @@ void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) { // is clobbered by ReadBarrierMarkRegX entry points). Get an extra // temporary register from the register allocator. locations->AddTemp(Location::RequiresRegister()); + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen_); + arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations); } } @@ -2599,11 +2598,7 @@ void IntrinsicCodeGeneratorARM::VisitFloatIsInfinite(HInvoke* invoke) { // We don't care about the sign bit, so shift left. __ Lsl(out, out, 1); __ eor(out, out, ShifterOperand(infinity)); - // If the result is 0, then it has 32 leading zeros, and less than that otherwise. - __ clz(out, out); - // Any number less than 32 logically shifted right by 5 bits results in 0; - // the same operation on 32 yields 1. - __ Lsr(out, out, 5); + codegen_->GenerateConditionWithZero(kCondEQ, out, out); } void IntrinsicLocationsBuilderARM::VisitDoubleIsInfinite(HInvoke* invoke) { @@ -2626,63 +2621,7 @@ void IntrinsicCodeGeneratorARM::VisitDoubleIsInfinite(HInvoke* invoke) { __ eor(out, out, ShifterOperand(infinity_high2)); // We don't care about the sign bit, so shift left. __ orr(out, IP, ShifterOperand(out, LSL, 1)); - // If the result is 0, then it has 32 leading zeros, and less than that otherwise. - __ clz(out, out); - // Any number less than 32 logically shifted right by 5 bits results in 0; - // the same operation on 32 yields 1. - __ Lsr(out, out, 5); -} - -void IntrinsicLocationsBuilderARM::VisitReferenceGetReferent(HInvoke* invoke) { - if (kEmitCompilerReadBarrier) { - // Do not intrinsify this call with the read barrier configuration. - return; - } - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARM::VisitReferenceGetReferent(HInvoke* invoke) { - DCHECK(!kEmitCompilerReadBarrier); - ArmAssembler* const assembler = GetAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - Register obj = locations->InAt(0).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke); - codegen_->AddSlowPath(slow_path); - - // Load ArtMethod first. - HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(invoke_direct != nullptr); - Register temp = codegen_->GenerateCalleeMethodStaticOrDirectCall( - invoke_direct, locations->GetTemp(0)).AsRegister<Register>(); - - // Now get declaring class. - __ ldr(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value())); - - uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset(); - uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset(); - DCHECK_NE(slow_path_flag_offset, 0u); - DCHECK_NE(disable_flag_offset, 0u); - DCHECK_NE(slow_path_flag_offset, disable_flag_offset); - - // Check static flags that prevent using intrinsic. - __ ldr(IP, Address(temp, disable_flag_offset)); - __ ldr(temp, Address(temp, slow_path_flag_offset)); - __ orr(IP, IP, ShifterOperand(temp)); - __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel()); - - // Fast path. - __ ldr(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value())); - codegen_->MaybeRecordImplicitNullCheck(invoke); - __ MaybeUnpoisonHeapReference(out); - __ Bind(slow_path->GetExitLabel()); + codegen_->GenerateConditionWithZero(kCondEQ, out, out); } void IntrinsicLocationsBuilderARM::VisitIntegerValueOf(HInvoke* invoke) { @@ -2754,6 +2693,30 @@ void IntrinsicCodeGeneratorARM::VisitIntegerValueOf(HInvoke* invoke) { } } +void IntrinsicLocationsBuilderARM::VisitThreadInterrupted(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARM::VisitThreadInterrupted(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + Register out = invoke->GetLocations()->Out().AsRegister<Register>(); + int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value(); + __ LoadFromOffset(kLoadWord, out, TR, offset); + Label done; + Label* const final_label = codegen_->GetFinalLabel(invoke, &done); + __ CompareAndBranchIfZero(out, final_label); + __ dmb(ISH); + __ LoadImmediate(IP, 0); + __ StoreToOffset(kStoreWord, IP, TR, offset); + __ dmb(ISH); + if (done.IsLinked()) { + __ Bind(&done); + } +} + UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble) UNIMPLEMENTED_INTRINSIC(ARM, MathMinFloatFloat) UNIMPLEMENTED_INTRINSIC(ARM, MathMaxDoubleDouble) @@ -2767,6 +2730,7 @@ UNIMPLEMENTED_INTRINSIC(ARM, MathRoundDouble) // Could be done by changing rou UNIMPLEMENTED_INTRINSIC(ARM, MathRoundFloat) // Could be done by changing rounding mode, maybe? UNIMPLEMENTED_INTRINSIC(ARM, UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(ARM, SystemArrayCopyChar) +UNIMPLEMENTED_INTRINSIC(ARM, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARM, IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARM, LongHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARM, IntegerLowestOneBit) diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 4d360158a2..990a773a95 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -28,7 +28,7 @@ #include "mirror/reference.h" #include "mirror/string-inl.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "utils/arm64/assembler_arm64.h" using namespace vixl::aarch64; // NOLINT(build/namespaces) @@ -1154,17 +1154,14 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* co Register temp = WRegisterFrom(locations->GetTemp(0)); // Need to make sure the reference stored in the field is a to-space // one before attempting the CAS or the CAS could fail incorrectly. - codegen->GenerateReferenceLoadWithBakerReadBarrier( + codegen->UpdateReferenceFieldWithBakerReadBarrier( invoke, out_loc, // Unused, used only as a "temporary" within the read barrier. base, - /* offset */ 0u, - /* index */ offset_loc, - /* scale_factor */ 0u, + /* field_offset */ offset_loc, temp, /* needs_null_check */ false, - /* use_load_acquire */ false, - /* always_update_field */ true); + /* use_load_acquire */ false); } } @@ -2900,69 +2897,6 @@ void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) { GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); } -void IntrinsicLocationsBuilderARM64::VisitReferenceGetReferent(HInvoke* invoke) { - if (kEmitCompilerReadBarrier) { - // Do not intrinsify this call with the read barrier configuration. - return; - } - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) { - DCHECK(!kEmitCompilerReadBarrier); - MacroAssembler* masm = GetVIXLAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - Register obj = InputRegisterAt(invoke, 0); - Register out = OutputRegister(invoke); - - SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); - codegen_->AddSlowPath(slow_path); - - // Load ArtMethod first. - HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(invoke_direct != nullptr); - Register temp0 = XRegisterFrom(codegen_->GenerateCalleeMethodStaticOrDirectCall( - invoke_direct, locations->GetTemp(0))); - - // Now get declaring class. - __ Ldr(temp0.W(), MemOperand(temp0, ArtMethod::DeclaringClassOffset().Int32Value())); - - uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset(); - uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset(); - DCHECK_NE(slow_path_flag_offset, 0u); - DCHECK_NE(disable_flag_offset, 0u); - DCHECK_NE(slow_path_flag_offset, disable_flag_offset); - - // Check static flags that prevent using intrinsic. - if (slow_path_flag_offset == disable_flag_offset + 1) { - // Load two adjacent flags in one 64-bit load. - __ Ldr(temp0, MemOperand(temp0, disable_flag_offset)); - } else { - UseScratchRegisterScope temps(masm); - Register temp1 = temps.AcquireW(); - __ Ldr(temp1.W(), MemOperand(temp0, disable_flag_offset)); - __ Ldr(temp0.W(), MemOperand(temp0, slow_path_flag_offset)); - __ Orr(temp0, temp1, temp0); - } - __ Cbnz(temp0, slow_path->GetEntryLabel()); - - { - // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. - vixl::EmissionCheckScope guard(codegen_->GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - // Fast path. - __ Ldr(out, HeapOperand(obj, mirror::Reference::ReferentOffset().Int32Value())); - codegen_->MaybeRecordImplicitNullCheck(invoke); - } - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out); - __ Bind(slow_path->GetExitLabel()); -} - void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) { InvokeRuntimeCallingConvention calling_convention; IntrinsicVisitor::ComputeIntegerValueOfLocations( @@ -3036,6 +2970,29 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { } } +void IntrinsicLocationsBuilderARM64::VisitThreadInterrupted(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) { + MacroAssembler* masm = GetVIXLAssembler(); + Register out = RegisterFrom(invoke->GetLocations()->Out(), Primitive::kPrimInt); + UseScratchRegisterScope temps(masm); + Register temp = temps.AcquireX(); + + __ Add(temp, tr, Thread::InterruptedOffset<kArm64PointerSize>().Int32Value()); + __ Ldar(out.W(), MemOperand(temp)); + + vixl::aarch64::Label done; + __ Cbz(out.W(), &done); + __ Stlr(wzr, MemOperand(temp)); + __ Bind(&done); +} + +UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit) diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h index 3c53517b28..ff59ce9658 100644 --- a/compiler/optimizing/intrinsics_arm64.h +++ b/compiler/optimizing/intrinsics_arm64.h @@ -24,7 +24,8 @@ namespace aarch64 { class MacroAssembler; -}} // namespace vixl::aarch64 +} // namespace aarch64 +} // namespace vixl namespace art { diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index fd8a37ae05..0e04b9a950 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -26,7 +26,7 @@ #include "mirror/reference.h" #include "mirror/string.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "aarch32/constants-aarch32.h" @@ -1347,17 +1347,14 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Need to make sure the reference stored in the field is a to-space // one before attempting the CAS or the CAS could fail incorrectly. - codegen->GenerateReferenceLoadWithBakerReadBarrier( + codegen->UpdateReferenceFieldWithBakerReadBarrier( invoke, out_loc, // Unused, used only as a "temporary" within the read barrier. base, - /* offset */ 0u, - /* index */ offset_loc, - ScaleFactor::TIMES_1, + /* field_offset */ offset_loc, tmp_ptr_loc, /* needs_null_check */ false, - /* always_update_field */ true, - &tmp); + tmp); } } @@ -2026,6 +2023,8 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // is clobbered by ReadBarrierMarkRegX entry points). Get an extra // temporary register from the register allocator. locations->AddTemp(Location::RequiresRegister()); + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_); + arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations); } } @@ -2972,11 +2971,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) { // We don't care about the sign bit, so shift left. __ Lsl(out, out, 1); __ Eor(out, out, infinity); - // If the result is 0, then it has 32 leading zeros, and less than that otherwise. - __ Clz(out, out); - // Any number less than 32 logically shifted right by 5 bits results in 0; - // the same operation on 32 yields 1. - __ Lsr(out, out, 5); + codegen_->GenerateConditionWithZero(kCondEQ, out, out); } void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) { @@ -3002,65 +2997,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) { __ Eor(out, out, infinity_high2); // We don't care about the sign bit, so shift left. __ Orr(out, temp, Operand(out, vixl32::LSL, 1)); - // If the result is 0, then it has 32 leading zeros, and less than that otherwise. - __ Clz(out, out); - // Any number less than 32 logically shifted right by 5 bits results in 0; - // the same operation on 32 yields 1. - __ Lsr(out, out, 5); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) { - if (kEmitCompilerReadBarrier) { - // Do not intrinsify this call with the read barrier configuration. - return; - } - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) { - DCHECK(!kEmitCompilerReadBarrier); - ArmVIXLAssembler* assembler = GetAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - vixl32::Register obj = InputRegisterAt(invoke, 0); - vixl32::Register out = OutputRegister(invoke); - - SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); - codegen_->AddSlowPath(slow_path); - - // Load ArtMethod first. - HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(invoke_direct != nullptr); - vixl32::Register temp0 = RegisterFrom(codegen_->GenerateCalleeMethodStaticOrDirectCall( - invoke_direct, locations->GetTemp(0))); - - // Now get declaring class. - __ Ldr(temp0, MemOperand(temp0, ArtMethod::DeclaringClassOffset().Int32Value())); - - uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset(); - uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset(); - DCHECK_NE(slow_path_flag_offset, 0u); - DCHECK_NE(disable_flag_offset, 0u); - DCHECK_NE(slow_path_flag_offset, disable_flag_offset); - - // Check static flags that prevent using intrinsic. - UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); - vixl32::Register temp1 = temps.Acquire(); - __ Ldr(temp1, MemOperand(temp0, disable_flag_offset)); - __ Ldr(temp0, MemOperand(temp0, slow_path_flag_offset)); - __ Orr(temp0, temp1, temp0); - __ CompareAndBranchIfNonZero(temp0, slow_path->GetEntryLabel()); - - // Fast path. - __ Ldr(out, MemOperand(obj, mirror::Reference::ReferentOffset().Int32Value())); - codegen_->MaybeRecordImplicitNullCheck(invoke); - assembler->MaybeUnpoisonHeapReference(out); - __ Bind(slow_path->GetExitLabel()); + codegen_->GenerateConditionWithZero(kCondEQ, out, out); } void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) { @@ -3136,7 +3073,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { __ Add(out, in, -info.low); __ Cmp(out, info.high - info.low + 1); vixl32::Label allocate, done; - __ B(hs, &allocate); + __ B(hs, &allocate, /* is_far_target */ false); // If the value is within the bounds, load the j.l.Integer directly from the array. uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); @@ -3158,9 +3095,36 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { } } +void IntrinsicLocationsBuilderARMVIXL::VisitThreadInterrupted(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + vixl32::Register out = RegisterFrom(invoke->GetLocations()->Out()); + int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value(); + __ Ldr(out, MemOperand(tr, offset)); + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + vixl32::Label done; + vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done); + __ CompareAndBranchIfZero(out, final_label, /* far_target */ false); + __ Dmb(vixl32::ISH); + __ Mov(temp, 0); + assembler->StoreToOffset(kStoreWord, temp, tr, offset); + __ Dmb(vixl32::ISH); + if (done.IsReferenced()) { + __ Bind(&done); + } +} + UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe? UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit) diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 41df56b514..ea3e9e5ec9 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -23,6 +23,7 @@ #include "intrinsics.h" #include "mirror/array-inl.h" #include "mirror/string.h" +#include "scoped_thread_state_change-inl.h" #include "thread.h" #include "utils/mips/assembler_mips.h" #include "utils/mips/constants_mips.h" @@ -32,7 +33,7 @@ namespace art { namespace mips { IntrinsicLocationsBuilderMIPS::IntrinsicLocationsBuilderMIPS(CodeGeneratorMIPS* codegen) - : arena_(codegen->GetGraph()->GetArena()) { + : codegen_(codegen), arena_(codegen->GetGraph()->GetArena()) { } MipsAssembler* IntrinsicCodeGeneratorMIPS::GetAssembler() { @@ -1525,6 +1526,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall), kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); @@ -2552,101 +2556,110 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) { Register out = locations->Out().AsRegister<Register>(); MipsLabel done; - MipsLabel finite; - MipsLabel add; - // if (in.isNaN) { - // return 0; - // } - // - // out = floor.w.s(in); - // - // /* - // * This "if" statement is only needed for the pre-R6 version of floor.w.s - // * which outputs Integer.MAX_VALUE for negative numbers with magnitudes - // * too large to fit in a 32-bit integer. - // * - // * Starting with MIPSR6, which always sets FCSR.NAN2008=1, negative - // * numbers which are too large to be represented in a 32-bit signed - // * integer will be processed by floor.w.s to output Integer.MIN_VALUE, - // * and will no longer be processed by this "if" statement. - // */ - // if (out == Integer.MAX_VALUE) { - // TMP = (in < 0.0f) ? 1 : 0; - // /* - // * If TMP is 1, then adding it to out will wrap its value from - // * Integer.MAX_VALUE to Integer.MIN_VALUE. - // */ - // return out += TMP; - // } - // - // /* - // * For negative values not handled by the previous "if" statement the - // * test here will correctly set the value of TMP. - // */ - // TMP = ((in - out) >= 0.5f) ? 1 : 0; - // return out += TMP; - - // Test for NaN. if (IsR6()) { - __ CmpUnS(FTMP, in, in); + // out = floor(in); + // + // if (out != MAX_VALUE && out != MIN_VALUE) { + // TMP = ((in - out) >= 0.5) ? 1 : 0; + // return out += TMP; + // } + // return out; + + // out = floor(in); + __ FloorWS(FTMP, in); + __ Mfc1(out, FTMP); + + // if (out != MAX_VALUE && out != MIN_VALUE) + __ Addiu(TMP, out, 1); + __ Aui(TMP, TMP, 0x8000); // TMP = out + 0x8000 0001 + // or out - 0x7FFF FFFF. + // IOW, TMP = 1 if out = Int.MIN_VALUE + // or TMP = 0 if out = Int.MAX_VALUE. + __ Srl(TMP, TMP, 1); // TMP = 0 if out = Int.MIN_VALUE + // or out = Int.MAX_VALUE. + __ Beqz(TMP, &done); + + // TMP = (0.5f <= (in - out)) ? -1 : 0; + __ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float". + __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f)); + __ SubS(FTMP, in, FTMP); + __ Mtc1(AT, half); + + __ CmpLeS(FTMP, half, FTMP); + __ Mfc1(TMP, FTMP); + + // Return out -= TMP. + __ Subu(out, out, TMP); } else { + // if (in.isNaN) { + // return 0; + // } + // + // out = floor.w.s(in); + // + // /* + // * This "if" statement is only needed for the pre-R6 version of floor.w.s + // * which outputs Integer.MAX_VALUE for negative numbers with magnitudes + // * too large to fit in a 32-bit integer. + // */ + // if (out == Integer.MAX_VALUE) { + // TMP = (in < 0.0f) ? 1 : 0; + // /* + // * If TMP is 1, then adding it to out will wrap its value from + // * Integer.MAX_VALUE to Integer.MIN_VALUE. + // */ + // return out += TMP; + // } + // + // /* + // * For negative values not handled by the previous "if" statement the + // * test here will correctly set the value of TMP. + // */ + // TMP = ((in - out) >= 0.5f) ? 1 : 0; + // return out += TMP; + + MipsLabel finite; + MipsLabel add; + + // Test for NaN. __ CunS(in, in); - } - // Return zero for NaN. - __ Move(out, ZERO); - if (IsR6()) { - __ Bc1nez(FTMP, &done); - } else { + // Return zero for NaN. + __ Move(out, ZERO); __ Bc1t(&done); - } - // out = floor(in); - __ FloorWS(FTMP, in); - __ Mfc1(out, FTMP); + // out = floor(in); + __ FloorWS(FTMP, in); + __ Mfc1(out, FTMP); - if (!IsR6()) { __ LoadConst32(TMP, -1); - } - // TMP = (out = java.lang.Integer.MAX_VALUE) ? -1 : 0; - __ LoadConst32(AT, std::numeric_limits<int32_t>::max()); - __ Bne(AT, out, &finite); + // TMP = (out = java.lang.Integer.MAX_VALUE) ? -1 : 0; + __ LoadConst32(AT, std::numeric_limits<int32_t>::max()); + __ Bne(AT, out, &finite); - __ Mtc1(ZERO, FTMP); - if (IsR6()) { - __ CmpLtS(FTMP, in, FTMP); - __ Mfc1(TMP, FTMP); - } else { + __ Mtc1(ZERO, FTMP); __ ColtS(in, FTMP); - } - __ B(&add); + __ B(&add); - __ Bind(&finite); + __ Bind(&finite); - // TMP = (0.5f <= (in - out)) ? -1 : 0; - __ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float". - __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f)); - __ SubS(FTMP, in, FTMP); - __ Mtc1(AT, half); - if (IsR6()) { - __ CmpLeS(FTMP, half, FTMP); - __ Mfc1(TMP, FTMP); - } else { + // TMP = (0.5f <= (in - out)) ? -1 : 0; + __ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float". + __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f)); + __ SubS(FTMP, in, FTMP); + __ Mtc1(AT, half); __ ColeS(half, FTMP); - } - __ Bind(&add); + __ Bind(&add); - if (!IsR6()) { __ Movf(TMP, ZERO); - } - - // Return out -= TMP. - __ Subu(out, out, TMP); + // Return out -= TMP. + __ Subu(out, out, TMP); + } __ Bind(&done); } @@ -3133,6 +3146,89 @@ void IntrinsicCodeGeneratorMIPS::VisitSystemArrayCopyChar(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +// long java.lang.Integer.valueOf(long) +void IntrinsicLocationsBuilderMIPS::VisitIntegerValueOf(HInvoke* invoke) { + InvokeRuntimeCallingConvention calling_convention; + IntrinsicVisitor::ComputeIntegerValueOfLocations( + invoke, + codegen_, + calling_convention.GetReturnLocation(Primitive::kPrimNot), + Location::RegisterLocation(calling_convention.GetRegisterAt(0))); +} + +void IntrinsicCodeGeneratorMIPS::VisitIntegerValueOf(HInvoke* invoke) { + IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + LocationSummary* locations = invoke->GetLocations(); + MipsAssembler* assembler = GetAssembler(); + InstructionCodeGeneratorMIPS* icodegen = + down_cast<InstructionCodeGeneratorMIPS*>(codegen_->GetInstructionVisitor()); + + Register out = locations->Out().AsRegister<Register>(); + InvokeRuntimeCallingConvention calling_convention; + if (invoke->InputAt(0)->IsConstant()) { + int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + if (value >= info.low && value <= info.high) { + // Just embed the j.l.Integer in the code. + ScopedObjectAccess soa(Thread::Current()); + mirror::Object* boxed = info.cache->Get(value + (-info.low)); + DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); + uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); + __ LoadConst32(out, address); + } else { + // Allocate and initialize a new j.l.Integer. + // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the + // JIT object table. + uint32_t address = + dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); + __ LoadConst32(calling_convention.GetRegisterAt(0), address); + codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + __ StoreConstToOffset(kStoreWord, value, out, info.value_offset, TMP); + // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation + // one. + icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); + } + } else { + Register in = locations->InAt(0).AsRegister<Register>(); + MipsLabel allocate, done; + int32_t count = static_cast<uint32_t>(info.high) - info.low + 1; + + // Is (info.low <= in) && (in <= info.high)? + __ Addiu32(out, in, -info.low); + // As unsigned quantities is out < (info.high - info.low + 1)? + if (IsInt<16>(count)) { + __ Sltiu(AT, out, count); + } else { + __ LoadConst32(AT, count); + __ Sltu(AT, out, AT); + } + // Branch if out >= (info.high - info.low + 1). + // This means that "in" is outside of the range [info.low, info.high]. + __ Beqz(AT, &allocate); + + // If the value is within the bounds, load the j.l.Integer directly from the array. + uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); + uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); + __ LoadConst32(TMP, data_offset + address); + __ ShiftAndAdd(out, out, TMP, TIMES_4); + __ Lw(out, out, 0); + __ MaybeUnpoisonHeapReference(out); + __ B(&done); + + __ Bind(&allocate); + // Otherwise allocate and initialize a new j.l.Integer. + address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); + __ LoadConst32(calling_convention.GetRegisterAt(0), address); + codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + __ StoreToOffset(kStoreWord, in, out, info.value_offset); + // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation + // one. + icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); + __ Bind(&done); + } +} + // Unimplemented intrinsics. UNIMPLEMENTED_INTRINSIC(MIPS, MathCeil) @@ -3162,7 +3258,7 @@ UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetInt) UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetLong) UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetObject) -UNIMPLEMENTED_INTRINSIC(MIPS, IntegerValueOf) +UNIMPLEMENTED_INTRINSIC(MIPS, ThreadInterrupted) UNREACHABLE_INTRINSICS(MIPS) diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h index e134cb882e..eaadad2515 100644 --- a/compiler/optimizing/intrinsics_mips.h +++ b/compiler/optimizing/intrinsics_mips.h @@ -49,6 +49,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) bool TryDispatch(HInvoke* invoke); private: + CodeGeneratorMIPS* codegen_; ArenaAllocator* arena_; DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index b57b41f686..2ecb1a3b02 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -23,6 +23,7 @@ #include "intrinsics.h" #include "mirror/array-inl.h" #include "mirror/string.h" +#include "scoped_thread_state_change-inl.h" #include "thread.h" #include "utils/mips64/assembler_mips64.h" #include "utils/mips64/constants_mips64.h" @@ -32,7 +33,7 @@ namespace art { namespace mips64 { IntrinsicLocationsBuilderMIPS64::IntrinsicLocationsBuilderMIPS64(CodeGeneratorMIPS64* codegen) - : arena_(codegen->GetGraph()->GetArena()) { + : codegen_(codegen), arena_(codegen->GetGraph()->GetArena()) { } Mips64Assembler* IntrinsicCodeGeneratorMIPS64::GetAssembler() { @@ -890,54 +891,14 @@ static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, Pri DCHECK(type == Primitive::kPrimFloat || type == Primitive::kPrimDouble); Mips64Label done; - Mips64Label finite; - Mips64Label add; - // if (in.isNaN) { - // return 0; - // } - // // out = floor(in); // - // /* - // * TODO: Amend this code when emulator FCSR.NAN2008=1 bug is fixed. - // * - // * Starting with MIPSR6, which always sets FCSR.NAN2008=1, negative - // * numbers which are too large to be represented in a 32-/64-bit - // * signed integer will be processed by floor.X.Y to output - // * Integer.MIN_VALUE/Long.MIN_VALUE, and will no longer be - // * processed by this "if" statement. - // * - // * However, this bug in the 64-bit MIPS emulator causes the - // * behavior of floor.X.Y to be the same as pre-R6 implementations - // * of MIPS64. When that bug is fixed this logic should be amended. - // */ - // if (out == MAX_VALUE) { - // TMP = (in < 0.0) ? 1 : 0; - // /* - // * If TMP is 1, then adding it to out will wrap its value from - // * MAX_VALUE to MIN_VALUE. - // */ + // if (out != MAX_VALUE && out != MIN_VALUE) { + // TMP = ((in - out) >= 0.5) ? 1 : 0; // return out += TMP; // } - // - // /* - // * For negative values not handled by the previous "if" statement the - // * test here will correctly set the value of TMP. - // */ - // TMP = ((in - out) >= 0.5) ? 1 : 0; - // return out += TMP; - - // Test for NaN. - if (type == Primitive::kPrimDouble) { - __ CmpUnD(FTMP, in, in); - } else { - __ CmpUnS(FTMP, in, in); - } - - // Return zero for NaN. - __ Move(out, ZERO); - __ Bc1nez(FTMP, &done); + // return out; // out = floor(in); if (type == Primitive::kPrimDouble) { @@ -948,27 +909,26 @@ static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, Pri __ Mfc1(out, FTMP); } - // TMP = (out = java.lang.Integer.MAX_VALUE) ? 1 : 0; + // if (out != MAX_VALUE && out != MIN_VALUE) if (type == Primitive::kPrimDouble) { - __ LoadConst64(AT, std::numeric_limits<int64_t>::max()); + __ Daddiu(TMP, out, 1); + __ Dati(TMP, 0x8000); // TMP = out + 0x8000 0000 0000 0001 + // or out - 0x7FFF FFFF FFFF FFFF. + // IOW, TMP = 1 if out = Long.MIN_VALUE + // or TMP = 0 if out = Long.MAX_VALUE. + __ Dsrl(TMP, TMP, 1); // TMP = 0 if out = Long.MIN_VALUE + // or out = Long.MAX_VALUE. + __ Beqzc(TMP, &done); } else { - __ LoadConst32(AT, std::numeric_limits<int32_t>::max()); + __ Addiu(TMP, out, 1); + __ Aui(TMP, TMP, 0x8000); // TMP = out + 0x8000 0001 + // or out - 0x7FFF FFFF. + // IOW, TMP = 1 if out = Int.MIN_VALUE + // or TMP = 0 if out = Int.MAX_VALUE. + __ Srl(TMP, TMP, 1); // TMP = 0 if out = Int.MIN_VALUE + // or out = Int.MAX_VALUE. + __ Beqzc(TMP, &done); } - __ Bnec(AT, out, &finite); - - if (type == Primitive::kPrimDouble) { - __ Dmtc1(ZERO, FTMP); - __ CmpLtD(FTMP, in, FTMP); - __ Dmfc1(AT, FTMP); - } else { - __ Mtc1(ZERO, FTMP); - __ CmpLtS(FTMP, in, FTMP); - __ Mfc1(AT, FTMP); - } - - __ Bc(&add); - - __ Bind(&finite); // TMP = (0.5 <= (in - out)) ? -1 : 0; if (type == Primitive::kPrimDouble) { @@ -977,23 +937,21 @@ static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, Pri __ SubD(FTMP, in, FTMP); __ Dmtc1(AT, half); __ CmpLeD(FTMP, half, FTMP); - __ Dmfc1(AT, FTMP); + __ Dmfc1(TMP, FTMP); } else { __ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float". __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f)); __ SubS(FTMP, in, FTMP); __ Mtc1(AT, half); __ CmpLeS(FTMP, half, FTMP); - __ Mfc1(AT, FTMP); + __ Mfc1(TMP, FTMP); } - __ Bind(&add); - // Return out -= TMP. if (type == Primitive::kPrimDouble) { - __ Dsubu(out, out, AT); + __ Dsubu(out, out, TMP); } else { - __ Subu(out, out, AT); + __ Subu(out, out, TMP); } __ Bind(&done); @@ -1168,6 +1126,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall), kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); @@ -2564,6 +2525,84 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathTanh(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickTanh); } +// long java.lang.Integer.valueOf(long) +void IntrinsicLocationsBuilderMIPS64::VisitIntegerValueOf(HInvoke* invoke) { + InvokeRuntimeCallingConvention calling_convention; + IntrinsicVisitor::ComputeIntegerValueOfLocations( + invoke, + codegen_, + calling_convention.GetReturnLocation(Primitive::kPrimNot), + Location::RegisterLocation(calling_convention.GetRegisterAt(0))); +} + +void IntrinsicCodeGeneratorMIPS64::VisitIntegerValueOf(HInvoke* invoke) { + IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + LocationSummary* locations = invoke->GetLocations(); + Mips64Assembler* assembler = GetAssembler(); + InstructionCodeGeneratorMIPS64* icodegen = + down_cast<InstructionCodeGeneratorMIPS64*>(codegen_->GetInstructionVisitor()); + + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + InvokeRuntimeCallingConvention calling_convention; + if (invoke->InputAt(0)->IsConstant()) { + int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + if (value >= info.low && value <= info.high) { + // Just embed the j.l.Integer in the code. + ScopedObjectAccess soa(Thread::Current()); + mirror::Object* boxed = info.cache->Get(value + (-info.low)); + DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); + uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); + __ LoadConst64(out, address); + } else { + // Allocate and initialize a new j.l.Integer. + // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the + // JIT object table. + uint32_t address = + dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); + __ LoadConst64(calling_convention.GetRegisterAt(0), address); + codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + __ StoreConstToOffset(kStoreWord, value, out, info.value_offset, TMP); + // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation + // one. + icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); + } + } else { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + Mips64Label allocate, done; + int32_t count = static_cast<uint32_t>(info.high) - info.low + 1; + + // Is (info.low <= in) && (in <= info.high)? + __ Addiu32(out, in, -info.low); + // As unsigned quantities is out < (info.high - info.low + 1)? + __ LoadConst32(AT, count); + // Branch if out >= (info.high - info.low + 1). + // This means that "in" is outside of the range [info.low, info.high]. + __ Bgeuc(out, AT, &allocate); + + // If the value is within the bounds, load the j.l.Integer directly from the array. + uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); + uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); + __ LoadConst64(TMP, data_offset + address); + __ Dlsa(out, out, TMP, TIMES_4); + __ Lwu(out, out, 0); + __ MaybeUnpoisonHeapReference(out); + __ Bc(&done); + + __ Bind(&allocate); + // Otherwise allocate and initialize a new j.l.Integer. + address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); + __ LoadConst64(calling_convention.GetRegisterAt(0), address); + codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + __ StoreToOffset(kStoreWord, in, out, info.value_offset); + // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation + // one. + icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); + __ Bind(&done); + } +} + UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy) @@ -2583,7 +2622,7 @@ UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetInt) UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetLong) UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetObject) -UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerValueOf) +UNIMPLEMENTED_INTRINSIC(MIPS64, ThreadInterrupted) UNREACHABLE_INTRINSICS(MIPS64) diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h index 5b95c26a21..179627ab20 100644 --- a/compiler/optimizing/intrinsics_mips64.h +++ b/compiler/optimizing/intrinsics_mips64.h @@ -49,6 +49,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) bool TryDispatch(HInvoke* invoke); private: + CodeGeneratorMIPS64* codegen_; ArenaAllocator* arena_; DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS64); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 8e4574774f..a9da15d2ce 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -31,7 +31,7 @@ #include "mirror/reference.h" #include "mirror/string.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "utils/x86/assembler_x86.h" #include "utils/x86/constants_x86.h" @@ -2819,65 +2819,6 @@ void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); } -void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) { - if (kEmitCompilerReadBarrier) { - // Do not intrinsify this call with the read barrier configuration. - return; - } - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) { - DCHECK(!kEmitCompilerReadBarrier); - LocationSummary* locations = invoke->GetLocations(); - X86Assembler* assembler = GetAssembler(); - - Register obj = locations->InAt(0).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); - codegen_->AddSlowPath(slow_path); - - // Load ArtMethod first. - HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(invoke_direct != nullptr); - Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall( - invoke_direct, locations->GetTemp(0)); - DCHECK(temp_loc.Equals(locations->GetTemp(0))); - Register temp = temp_loc.AsRegister<Register>(); - - // Now get declaring class. - __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value())); - - uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset(); - uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset(); - DCHECK_NE(slow_path_flag_offset, 0u); - DCHECK_NE(disable_flag_offset, 0u); - DCHECK_NE(slow_path_flag_offset, disable_flag_offset); - - // Check static flags preventing us for using intrinsic. - if (slow_path_flag_offset == disable_flag_offset + 1) { - __ cmpw(Address(temp, disable_flag_offset), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - } else { - __ cmpb(Address(temp, disable_flag_offset), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - } - - // Fast path. - __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value())); - codegen_->MaybeRecordImplicitNullCheck(invoke); - __ MaybeUnpoisonHeapReference(out); - __ Bind(slow_path->GetExitLabel()); -} - static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) { return instruction->InputAt(input0) == instruction->InputAt(input1); } @@ -3407,7 +3348,29 @@ void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) { } } +void IntrinsicLocationsBuilderX86::VisitThreadInterrupted(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) { + X86Assembler* assembler = GetAssembler(); + Register out = invoke->GetLocations()->Out().AsRegister<Register>(); + Address address = Address::Absolute(Thread::InterruptedOffset<kX86PointerSize>().Int32Value()); + NearLabel done; + __ fs()->movl(out, address); + __ testl(out, out); + __ j(kEqual, &done); + __ fs()->movl(address, Immediate(0)); + codegen_->MemoryFence(); + __ Bind(&done); +} + + UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble) +UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite) UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 8ed2ad86bf..8100645e54 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -31,7 +31,7 @@ #include "mirror/reference.h" #include "mirror/string.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "utils/x86_64/assembler_x86_64.h" #include "utils/x86_64/constants_x86_64.h" @@ -759,7 +759,7 @@ static void CreateFPToFPCallLocations(ArenaAllocator* arena, // We have to ensure that the native code doesn't clobber the XMM registers which are // non-volatile for ART, but volatile for Native calls. This will ensure that they are // saved in the prologue and properly restored. - for (auto fp_reg : non_volatile_xmm_regs) { + for (FloatRegister fp_reg : non_volatile_xmm_regs) { locations->AddTemp(Location::FpuRegisterLocation(fp_reg)); } } @@ -898,7 +898,7 @@ static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, // We have to ensure that the native code doesn't clobber the XMM registers which are // non-volatile for ART, but volatile for Native calls. This will ensure that they are // saved in the prologue and properly restored. - for (auto fp_reg : non_volatile_xmm_regs) { + for (FloatRegister fp_reg : non_volatile_xmm_regs) { locations->AddTemp(Location::FpuRegisterLocation(fp_reg)); } } @@ -2959,65 +2959,6 @@ void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invok GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); } -void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) { - if (kEmitCompilerReadBarrier) { - // Do not intrinsify this call with the read barrier configuration. - return; - } - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) { - DCHECK(!kEmitCompilerReadBarrier); - LocationSummary* locations = invoke->GetLocations(); - X86_64Assembler* assembler = GetAssembler(); - - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); - codegen_->AddSlowPath(slow_path); - - // Load ArtMethod first. - HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(invoke_direct != nullptr); - Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall( - invoke_direct, locations->GetTemp(0)); - DCHECK(temp_loc.Equals(locations->GetTemp(0))); - CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); - - // Now get declaring class. - __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value())); - - uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset(); - uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset(); - DCHECK_NE(slow_path_flag_offset, 0u); - DCHECK_NE(disable_flag_offset, 0u); - DCHECK_NE(slow_path_flag_offset, disable_flag_offset); - - // Check static flags preventing us for using intrinsic. - if (slow_path_flag_offset == disable_flag_offset + 1) { - __ cmpw(Address(temp, disable_flag_offset), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - } else { - __ cmpb(Address(temp, disable_flag_offset), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - } - - // Fast path. - __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value())); - codegen_->MaybeRecordImplicitNullCheck(invoke); - __ MaybeUnpoisonHeapReference(out); - __ Bind(slow_path->GetExitLabel()); -} - void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) { InvokeRuntimeCallingConvention calling_convention; IntrinsicVisitor::ComputeIntegerValueOfLocations( @@ -3085,6 +3026,28 @@ void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) { } } +void IntrinsicLocationsBuilderX86_64::VisitThreadInterrupted(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) { + X86_64Assembler* assembler = GetAssembler(); + CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>(); + Address address = Address::Absolute + (Thread::InterruptedOffset<kX86_64PointerSize>().Int32Value(), /* no_rip */ true); + NearLabel done; + __ gs()->movl(out, address); + __ testl(out, out); + __ j(kEqual, &done); + __ gs()->movl(address, Immediate(0)); + codegen_->MemoryFence(); + __ Bind(&done); +} + +UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite) diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc new file mode 100644 index 0000000000..f2ee345c8c --- /dev/null +++ b/compiler/optimizing/load_store_analysis.cc @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "load_store_analysis.h" + +namespace art { + +// A cap for the number of heap locations to prevent pathological time/space consumption. +// The number of heap locations for most of the methods stays below this threshold. +constexpr size_t kMaxNumberOfHeapLocations = 32; + +void LoadStoreAnalysis::Run() { + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + heap_location_collector_.VisitBasicBlock(block); + } + + if (heap_location_collector_.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) { + // Bail out if there are too many heap locations to deal with. + heap_location_collector_.CleanUp(); + return; + } + if (!heap_location_collector_.HasHeapStores()) { + // Without heap stores, this pass would act mostly as GVN on heap accesses. + heap_location_collector_.CleanUp(); + return; + } + if (heap_location_collector_.HasVolatile() || heap_location_collector_.HasMonitorOps()) { + // Don't do load/store elimination if the method has volatile field accesses or + // monitor operations, for now. + // TODO: do it right. + heap_location_collector_.CleanUp(); + return; + } + + heap_location_collector_.BuildAliasingMatrix(); +} + +} // namespace art diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h new file mode 100644 index 0000000000..4e940f30bf --- /dev/null +++ b/compiler/optimizing/load_store_analysis.h @@ -0,0 +1,518 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_ +#define ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_ + +#include "escape.h" +#include "nodes.h" +#include "optimization.h" + +namespace art { + +// A ReferenceInfo contains additional info about a reference such as +// whether it's a singleton, returned, etc. +class ReferenceInfo : public ArenaObject<kArenaAllocMisc> { + public: + ReferenceInfo(HInstruction* reference, size_t pos) + : reference_(reference), + position_(pos), + is_singleton_(true), + is_singleton_and_not_returned_(true), + is_singleton_and_not_deopt_visible_(true), + has_index_aliasing_(false) { + CalculateEscape(reference_, + nullptr, + &is_singleton_, + &is_singleton_and_not_returned_, + &is_singleton_and_not_deopt_visible_); + } + + HInstruction* GetReference() const { + return reference_; + } + + size_t GetPosition() const { + return position_; + } + + // Returns true if reference_ is the only name that can refer to its value during + // the lifetime of the method. So it's guaranteed to not have any alias in + // the method (including its callees). + bool IsSingleton() const { + return is_singleton_; + } + + // Returns true if reference_ is a singleton and not returned to the caller or + // used as an environment local of an HDeoptimize instruction. + // The allocation and stores into reference_ may be eliminated for such cases. + bool IsSingletonAndRemovable() const { + return is_singleton_and_not_returned_ && is_singleton_and_not_deopt_visible_; + } + + // Returns true if reference_ is a singleton and returned to the caller or + // used as an environment local of an HDeoptimize instruction. + bool IsSingletonAndNonRemovable() const { + return is_singleton_ && + (!is_singleton_and_not_returned_ || !is_singleton_and_not_deopt_visible_); + } + + bool HasIndexAliasing() { + return has_index_aliasing_; + } + + void SetHasIndexAliasing(bool has_index_aliasing) { + // Only allow setting to true. + DCHECK(has_index_aliasing); + has_index_aliasing_ = has_index_aliasing; + } + + private: + HInstruction* const reference_; + const size_t position_; // position in HeapLocationCollector's ref_info_array_. + + // Can only be referred to by a single name in the method. + bool is_singleton_; + // Is singleton and not returned to caller. + bool is_singleton_and_not_returned_; + // Is singleton and not used as an environment local of HDeoptimize. + bool is_singleton_and_not_deopt_visible_; + // Some heap locations with reference_ have array index aliasing, + // e.g. arr[i] and arr[j] may be the same location. + bool has_index_aliasing_; + + DISALLOW_COPY_AND_ASSIGN(ReferenceInfo); +}; + +// A heap location is a reference-offset/index pair that a value can be loaded from +// or stored to. +class HeapLocation : public ArenaObject<kArenaAllocMisc> { + public: + static constexpr size_t kInvalidFieldOffset = -1; + + // TODO: more fine-grained array types. + static constexpr int16_t kDeclaringClassDefIndexForArrays = -1; + + HeapLocation(ReferenceInfo* ref_info, + size_t offset, + HInstruction* index, + int16_t declaring_class_def_index) + : ref_info_(ref_info), + offset_(offset), + index_(index), + declaring_class_def_index_(declaring_class_def_index), + value_killed_by_loop_side_effects_(true) { + DCHECK(ref_info != nullptr); + DCHECK((offset == kInvalidFieldOffset && index != nullptr) || + (offset != kInvalidFieldOffset && index == nullptr)); + if (ref_info->IsSingleton() && !IsArrayElement()) { + // Assume this location's value cannot be killed by loop side effects + // until proven otherwise. + value_killed_by_loop_side_effects_ = false; + } + } + + ReferenceInfo* GetReferenceInfo() const { return ref_info_; } + size_t GetOffset() const { return offset_; } + HInstruction* GetIndex() const { return index_; } + + // Returns the definition of declaring class' dex index. + // It's kDeclaringClassDefIndexForArrays for an array element. + int16_t GetDeclaringClassDefIndex() const { + return declaring_class_def_index_; + } + + bool IsArrayElement() const { + return index_ != nullptr; + } + + bool IsValueKilledByLoopSideEffects() const { + return value_killed_by_loop_side_effects_; + } + + void SetValueKilledByLoopSideEffects(bool val) { + value_killed_by_loop_side_effects_ = val; + } + + private: + ReferenceInfo* const ref_info_; // reference for instance/static field or array access. + const size_t offset_; // offset of static/instance field. + HInstruction* const index_; // index of an array element. + const int16_t declaring_class_def_index_; // declaring class's def's dex index. + bool value_killed_by_loop_side_effects_; // value of this location may be killed by loop + // side effects because this location is stored + // into inside a loop. This gives + // better info on whether a singleton's location + // value may be killed by loop side effects. + + DISALLOW_COPY_AND_ASSIGN(HeapLocation); +}; + +// A HeapLocationCollector collects all relevant heap locations and keeps +// an aliasing matrix for all locations. +class HeapLocationCollector : public HGraphVisitor { + public: + static constexpr size_t kHeapLocationNotFound = -1; + // Start with a single uint32_t word. That's enough bits for pair-wise + // aliasing matrix of 8 heap locations. + static constexpr uint32_t kInitialAliasingMatrixBitVectorSize = 32; + + explicit HeapLocationCollector(HGraph* graph) + : HGraphVisitor(graph), + ref_info_array_(graph->GetArena()->Adapter(kArenaAllocLSE)), + heap_locations_(graph->GetArena()->Adapter(kArenaAllocLSE)), + aliasing_matrix_(graph->GetArena(), + kInitialAliasingMatrixBitVectorSize, + true, + kArenaAllocLSE), + has_heap_stores_(false), + has_volatile_(false), + has_monitor_operations_(false) {} + + void CleanUp() { + heap_locations_.clear(); + ref_info_array_.clear(); + } + + size_t GetNumberOfHeapLocations() const { + return heap_locations_.size(); + } + + HeapLocation* GetHeapLocation(size_t index) const { + return heap_locations_[index]; + } + + HInstruction* HuntForOriginalReference(HInstruction* ref) const { + DCHECK(ref != nullptr); + while (ref->IsNullCheck() || ref->IsBoundType()) { + ref = ref->InputAt(0); + } + return ref; + } + + ReferenceInfo* FindReferenceInfoOf(HInstruction* ref) const { + for (size_t i = 0; i < ref_info_array_.size(); i++) { + ReferenceInfo* ref_info = ref_info_array_[i]; + if (ref_info->GetReference() == ref) { + DCHECK_EQ(i, ref_info->GetPosition()); + return ref_info; + } + } + return nullptr; + } + + bool HasHeapStores() const { + return has_heap_stores_; + } + + bool HasVolatile() const { + return has_volatile_; + } + + bool HasMonitorOps() const { + return has_monitor_operations_; + } + + // Find and return the heap location index in heap_locations_. + size_t FindHeapLocationIndex(ReferenceInfo* ref_info, + size_t offset, + HInstruction* index, + int16_t declaring_class_def_index) const { + for (size_t i = 0; i < heap_locations_.size(); i++) { + HeapLocation* loc = heap_locations_[i]; + if (loc->GetReferenceInfo() == ref_info && + loc->GetOffset() == offset && + loc->GetIndex() == index && + loc->GetDeclaringClassDefIndex() == declaring_class_def_index) { + return i; + } + } + return kHeapLocationNotFound; + } + + // Returns true if heap_locations_[index1] and heap_locations_[index2] may alias. + bool MayAlias(size_t index1, size_t index2) const { + if (index1 < index2) { + return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index1, index2)); + } else if (index1 > index2) { + return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index2, index1)); + } else { + DCHECK(false) << "index1 and index2 are expected to be different"; + return true; + } + } + + void BuildAliasingMatrix() { + const size_t number_of_locations = heap_locations_.size(); + if (number_of_locations == 0) { + return; + } + size_t pos = 0; + // Compute aliasing info between every pair of different heap locations. + // Save the result in a matrix represented as a BitVector. + for (size_t i = 0; i < number_of_locations - 1; i++) { + for (size_t j = i + 1; j < number_of_locations; j++) { + if (ComputeMayAlias(i, j)) { + aliasing_matrix_.SetBit(CheckedAliasingMatrixPosition(i, j, pos)); + } + pos++; + } + } + } + + private: + // An allocation cannot alias with a name which already exists at the point + // of the allocation, such as a parameter or a load happening before the allocation. + bool MayAliasWithPreexistenceChecking(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const { + if (ref_info1->GetReference()->IsNewInstance() || ref_info1->GetReference()->IsNewArray()) { + // Any reference that can alias with the allocation must appear after it in the block/in + // the block's successors. In reverse post order, those instructions will be visited after + // the allocation. + return ref_info2->GetPosition() >= ref_info1->GetPosition(); + } + return true; + } + + bool CanReferencesAlias(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const { + if (ref_info1 == ref_info2) { + return true; + } else if (ref_info1->IsSingleton()) { + return false; + } else if (ref_info2->IsSingleton()) { + return false; + } else if (!MayAliasWithPreexistenceChecking(ref_info1, ref_info2) || + !MayAliasWithPreexistenceChecking(ref_info2, ref_info1)) { + return false; + } + return true; + } + + // `index1` and `index2` are indices in the array of collected heap locations. + // Returns the position in the bit vector that tracks whether the two heap + // locations may alias. + size_t AliasingMatrixPosition(size_t index1, size_t index2) const { + DCHECK(index2 > index1); + const size_t number_of_locations = heap_locations_.size(); + // It's (num_of_locations - 1) + ... + (num_of_locations - index1) + (index2 - index1 - 1). + return (number_of_locations * index1 - (1 + index1) * index1 / 2 + (index2 - index1 - 1)); + } + + // An additional position is passed in to make sure the calculated position is correct. + size_t CheckedAliasingMatrixPosition(size_t index1, size_t index2, size_t position) { + size_t calculated_position = AliasingMatrixPosition(index1, index2); + DCHECK_EQ(calculated_position, position); + return calculated_position; + } + + // Compute if two locations may alias to each other. + bool ComputeMayAlias(size_t index1, size_t index2) const { + HeapLocation* loc1 = heap_locations_[index1]; + HeapLocation* loc2 = heap_locations_[index2]; + if (loc1->GetOffset() != loc2->GetOffset()) { + // Either two different instance fields, or one is an instance + // field and the other is an array element. + return false; + } + if (loc1->GetDeclaringClassDefIndex() != loc2->GetDeclaringClassDefIndex()) { + // Different types. + return false; + } + if (!CanReferencesAlias(loc1->GetReferenceInfo(), loc2->GetReferenceInfo())) { + return false; + } + if (loc1->IsArrayElement() && loc2->IsArrayElement()) { + HInstruction* array_index1 = loc1->GetIndex(); + HInstruction* array_index2 = loc2->GetIndex(); + DCHECK(array_index1 != nullptr); + DCHECK(array_index2 != nullptr); + if (array_index1->IsIntConstant() && + array_index2->IsIntConstant() && + array_index1->AsIntConstant()->GetValue() != array_index2->AsIntConstant()->GetValue()) { + // Different constant indices do not alias. + return false; + } + ReferenceInfo* ref_info = loc1->GetReferenceInfo(); + ref_info->SetHasIndexAliasing(true); + } + return true; + } + + ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* instruction) { + ReferenceInfo* ref_info = FindReferenceInfoOf(instruction); + if (ref_info == nullptr) { + size_t pos = ref_info_array_.size(); + ref_info = new (GetGraph()->GetArena()) ReferenceInfo(instruction, pos); + ref_info_array_.push_back(ref_info); + } + return ref_info; + } + + void CreateReferenceInfoForReferenceType(HInstruction* instruction) { + if (instruction->GetType() != Primitive::kPrimNot) { + return; + } + DCHECK(FindReferenceInfoOf(instruction) == nullptr); + GetOrCreateReferenceInfo(instruction); + } + + HeapLocation* GetOrCreateHeapLocation(HInstruction* ref, + size_t offset, + HInstruction* index, + int16_t declaring_class_def_index) { + HInstruction* original_ref = HuntForOriginalReference(ref); + ReferenceInfo* ref_info = GetOrCreateReferenceInfo(original_ref); + size_t heap_location_idx = FindHeapLocationIndex( + ref_info, offset, index, declaring_class_def_index); + if (heap_location_idx == kHeapLocationNotFound) { + HeapLocation* heap_loc = new (GetGraph()->GetArena()) + HeapLocation(ref_info, offset, index, declaring_class_def_index); + heap_locations_.push_back(heap_loc); + return heap_loc; + } + return heap_locations_[heap_location_idx]; + } + + HeapLocation* VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) { + if (field_info.IsVolatile()) { + has_volatile_ = true; + } + const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex(); + const size_t offset = field_info.GetFieldOffset().SizeValue(); + return GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index); + } + + void VisitArrayAccess(HInstruction* array, HInstruction* index) { + GetOrCreateHeapLocation(array, HeapLocation::kInvalidFieldOffset, + index, HeapLocation::kDeclaringClassDefIndexForArrays); + } + + void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE { + VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { + HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); + has_heap_stores_ = true; + if (location->GetReferenceInfo()->IsSingleton()) { + // A singleton's location value may be killed by loop side effects if it's + // defined before that loop, and it's stored into inside that loop. + HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation(); + if (loop_info != nullptr) { + HInstruction* ref = location->GetReferenceInfo()->GetReference(); + DCHECK(ref->IsNewInstance()); + if (loop_info->IsDefinedOutOfTheLoop(ref)) { + // ref's location value may be killed by this loop's side effects. + location->SetValueKilledByLoopSideEffects(true); + } else { + // ref is defined inside this loop so this loop's side effects cannot + // kill its location value at the loop header since ref/its location doesn't + // exist yet at the loop header. + } + } + } else { + // For non-singletons, value_killed_by_loop_side_effects_ is inited to + // true. + DCHECK_EQ(location->IsValueKilledByLoopSideEffects(), true); + } + } + + void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE { + VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE { + VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); + has_heap_stores_ = true; + } + + // We intentionally don't collect HUnresolvedInstanceField/HUnresolvedStaticField accesses + // since we cannot accurately track the fields. + + void VisitArrayGet(HArrayGet* instruction) OVERRIDE { + VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1)); + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitArraySet(HArraySet* instruction) OVERRIDE { + VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1)); + has_heap_stores_ = true; + } + + void VisitNewInstance(HNewInstance* new_instance) OVERRIDE { + // Any references appearing in the ref_info_array_ so far cannot alias with new_instance. + CreateReferenceInfoForReferenceType(new_instance); + } + + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitInvokeVirtual(HInvokeVirtual* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitInvokeInterface(HInvokeInterface* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitParameterValue(HParameterValue* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitSelect(HSelect* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) OVERRIDE { + has_monitor_operations_ = true; + } + + ArenaVector<ReferenceInfo*> ref_info_array_; // All references used for heap accesses. + ArenaVector<HeapLocation*> heap_locations_; // All heap locations. + ArenaBitVector aliasing_matrix_; // aliasing info between each pair of locations. + bool has_heap_stores_; // If there is no heap stores, LSE acts as GVN with better + // alias analysis and won't be as effective. + bool has_volatile_; // If there are volatile field accesses. + bool has_monitor_operations_; // If there are monitor operations. + + DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector); +}; + +class LoadStoreAnalysis : public HOptimization { + public: + explicit LoadStoreAnalysis(HGraph* graph) + : HOptimization(graph, kLoadStoreAnalysisPassName), + heap_location_collector_(graph) {} + + const HeapLocationCollector& GetHeapLocationCollector() const { + return heap_location_collector_; + } + + void Run() OVERRIDE; + + static constexpr const char* kLoadStoreAnalysisPassName = "load_store_analysis"; + + private: + HeapLocationCollector heap_location_collector_; + + DISALLOW_COPY_AND_ASSIGN(LoadStoreAnalysis); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_ diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc new file mode 100644 index 0000000000..24187777f6 --- /dev/null +++ b/compiler/optimizing/load_store_analysis_test.cc @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "load_store_analysis.h" +#include "nodes.h" +#include "optimizing_unit_test.h" + +#include "gtest/gtest.h" + +namespace art { + +class LoadStoreAnalysisTest : public CommonCompilerTest { + public: + LoadStoreAnalysisTest() : pool_(), allocator_(&pool_) { + graph_ = CreateGraph(&allocator_); + } + + ArenaPool pool_; + ArenaAllocator allocator_; + HGraph* graph_; +}; + +TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) { + HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(entry); + graph_->SetEntryBlock(entry); + + // entry: + // array ParameterValue + // index ParameterValue + // c1 IntConstant + // c2 IntConstant + // c3 IntConstant + // array_get1 ArrayGet [array, c1] + // array_get2 ArrayGet [array, c2] + // array_set1 ArraySet [array, c1, c3] + // array_set2 ArraySet [array, index, c3] + HInstruction* array = new (&allocator_) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); + HInstruction* index = new (&allocator_) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(1), 1, Primitive::kPrimInt); + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* c2 = graph_->GetIntConstant(2); + HInstruction* c3 = graph_->GetIntConstant(3); + HInstruction* array_get1 = new (&allocator_) HArrayGet(array, c1, Primitive::kPrimInt, 0); + HInstruction* array_get2 = new (&allocator_) HArrayGet(array, c2, Primitive::kPrimInt, 0); + HInstruction* array_set1 = new (&allocator_) HArraySet(array, c1, c3, Primitive::kPrimInt, 0); + HInstruction* array_set2 = new (&allocator_) HArraySet(array, index, c3, Primitive::kPrimInt, 0); + entry->AddInstruction(array); + entry->AddInstruction(index); + entry->AddInstruction(array_get1); + entry->AddInstruction(array_get2); + entry->AddInstruction(array_set1); + entry->AddInstruction(array_set2); + + // Test HeapLocationCollector initialization. + // Should be no heap locations, no operations on the heap. + HeapLocationCollector heap_location_collector(graph_); + ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 0U); + ASSERT_FALSE(heap_location_collector.HasHeapStores()); + + // Test that after visiting the graph_, it must see following heap locations + // array[c1], array[c2], array[index]; and it should see heap stores. + heap_location_collector.VisitBasicBlock(entry); + ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 3U); + ASSERT_TRUE(heap_location_collector.HasHeapStores()); + + // Test queries on HeapLocationCollector's ref info and index records. + ReferenceInfo* ref = heap_location_collector.FindReferenceInfoOf(array); + size_t field_off = HeapLocation::kInvalidFieldOffset; + size_t class_def = HeapLocation::kDeclaringClassDefIndexForArrays; + size_t loc1 = heap_location_collector.FindHeapLocationIndex(ref, field_off, c1, class_def); + size_t loc2 = heap_location_collector.FindHeapLocationIndex(ref, field_off, c2, class_def); + size_t loc3 = heap_location_collector.FindHeapLocationIndex(ref, field_off, index, class_def); + // must find this reference info for array in HeapLocationCollector. + ASSERT_TRUE(ref != nullptr); + // must find these heap locations; + // and array[1], array[2], array[3] should be different heap locations. + ASSERT_TRUE(loc1 != HeapLocationCollector::kHeapLocationNotFound); + ASSERT_TRUE(loc2 != HeapLocationCollector::kHeapLocationNotFound); + ASSERT_TRUE(loc3 != HeapLocationCollector::kHeapLocationNotFound); + ASSERT_TRUE(loc1 != loc2); + ASSERT_TRUE(loc2 != loc3); + ASSERT_TRUE(loc1 != loc3); + + // Test alias relationships after building aliasing matrix. + // array[1] and array[2] clearly should not alias; + // array[index] should alias with the others, because index is an unknow value. + heap_location_collector.BuildAliasingMatrix(); + ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); + ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3)); + ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3)); +} + +TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) { + HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(entry); + graph_->SetEntryBlock(entry); + + // entry: + // object ParameterValue + // c1 IntConstant + // set_field10 InstanceFieldSet [object, c1, 10] + // get_field10 InstanceFieldGet [object, 10] + // get_field20 InstanceFieldGet [object, 20] + + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* object = new (&allocator_) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + Primitive::kPrimNot); + HInstanceFieldSet* set_field10 = new (&allocator_) HInstanceFieldSet(object, + c1, + nullptr, + Primitive::kPrimInt, + MemberOffset(10), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph_->GetDexFile(), + 0); + HInstanceFieldGet* get_field10 = new (&allocator_) HInstanceFieldGet(object, + nullptr, + Primitive::kPrimInt, + MemberOffset(10), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph_->GetDexFile(), + 0); + HInstanceFieldGet* get_field20 = new (&allocator_) HInstanceFieldGet(object, + nullptr, + Primitive::kPrimInt, + MemberOffset(20), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph_->GetDexFile(), + 0); + entry->AddInstruction(object); + entry->AddInstruction(set_field10); + entry->AddInstruction(get_field10); + entry->AddInstruction(get_field20); + + // Test HeapLocationCollector initialization. + // Should be no heap locations, no operations on the heap. + HeapLocationCollector heap_location_collector(graph_); + ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 0U); + ASSERT_FALSE(heap_location_collector.HasHeapStores()); + + // Test that after visiting the graph, it must see following heap locations + // object.field10, object.field20 and it should see heap stores. + heap_location_collector.VisitBasicBlock(entry); + ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 2U); + ASSERT_TRUE(heap_location_collector.HasHeapStores()); + + // Test queries on HeapLocationCollector's ref info and index records. + ReferenceInfo* ref = heap_location_collector.FindReferenceInfoOf(object); + size_t loc1 = heap_location_collector.FindHeapLocationIndex( + ref, 10, nullptr, kUnknownClassDefIndex); + size_t loc2 = heap_location_collector.FindHeapLocationIndex( + ref, 20, nullptr, kUnknownClassDefIndex); + // must find references info for object and in HeapLocationCollector. + ASSERT_TRUE(ref != nullptr); + // must find these heap locations. + ASSERT_TRUE(loc1 != HeapLocationCollector::kHeapLocationNotFound); + ASSERT_TRUE(loc2 != HeapLocationCollector::kHeapLocationNotFound); + // different fields of same object. + ASSERT_TRUE(loc1 != loc2); + // accesses to different fields of the same object should not alias. + ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); +} + +} // namespace art diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 48699b33ae..211528b4bd 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "load_store_analysis.h" #include "load_store_elimination.h" #include "escape.h" @@ -23,477 +24,6 @@ namespace art { -class ReferenceInfo; - -// A cap for the number of heap locations to prevent pathological time/space consumption. -// The number of heap locations for most of the methods stays below this threshold. -constexpr size_t kMaxNumberOfHeapLocations = 32; - -// A ReferenceInfo contains additional info about a reference such as -// whether it's a singleton, returned, etc. -class ReferenceInfo : public ArenaObject<kArenaAllocMisc> { - public: - ReferenceInfo(HInstruction* reference, size_t pos) - : reference_(reference), - position_(pos), - is_singleton_(true), - is_singleton_and_not_returned_(true), - is_singleton_and_not_deopt_visible_(true), - has_index_aliasing_(false) { - CalculateEscape(reference_, - nullptr, - &is_singleton_, - &is_singleton_and_not_returned_, - &is_singleton_and_not_deopt_visible_); - } - - HInstruction* GetReference() const { - return reference_; - } - - size_t GetPosition() const { - return position_; - } - - // Returns true if reference_ is the only name that can refer to its value during - // the lifetime of the method. So it's guaranteed to not have any alias in - // the method (including its callees). - bool IsSingleton() const { - return is_singleton_; - } - - // Returns true if reference_ is a singleton and not returned to the caller or - // used as an environment local of an HDeoptimize instruction. - // The allocation and stores into reference_ may be eliminated for such cases. - bool IsSingletonAndRemovable() const { - return is_singleton_and_not_returned_ && is_singleton_and_not_deopt_visible_; - } - - // Returns true if reference_ is a singleton and returned to the caller or - // used as an environment local of an HDeoptimize instruction. - bool IsSingletonAndNonRemovable() const { - return is_singleton_ && - (!is_singleton_and_not_returned_ || !is_singleton_and_not_deopt_visible_); - } - - bool HasIndexAliasing() { - return has_index_aliasing_; - } - - void SetHasIndexAliasing(bool has_index_aliasing) { - // Only allow setting to true. - DCHECK(has_index_aliasing); - has_index_aliasing_ = has_index_aliasing; - } - - private: - HInstruction* const reference_; - const size_t position_; // position in HeapLocationCollector's ref_info_array_. - - // Can only be referred to by a single name in the method. - bool is_singleton_; - // Is singleton and not returned to caller. - bool is_singleton_and_not_returned_; - // Is singleton and not used as an environment local of HDeoptimize. - bool is_singleton_and_not_deopt_visible_; - // Some heap locations with reference_ have array index aliasing, - // e.g. arr[i] and arr[j] may be the same location. - bool has_index_aliasing_; - - DISALLOW_COPY_AND_ASSIGN(ReferenceInfo); -}; - -// A heap location is a reference-offset/index pair that a value can be loaded from -// or stored to. -class HeapLocation : public ArenaObject<kArenaAllocMisc> { - public: - static constexpr size_t kInvalidFieldOffset = -1; - - // TODO: more fine-grained array types. - static constexpr int16_t kDeclaringClassDefIndexForArrays = -1; - - HeapLocation(ReferenceInfo* ref_info, - size_t offset, - HInstruction* index, - int16_t declaring_class_def_index) - : ref_info_(ref_info), - offset_(offset), - index_(index), - declaring_class_def_index_(declaring_class_def_index), - value_killed_by_loop_side_effects_(true) { - DCHECK(ref_info != nullptr); - DCHECK((offset == kInvalidFieldOffset && index != nullptr) || - (offset != kInvalidFieldOffset && index == nullptr)); - if (ref_info->IsSingleton() && !IsArrayElement()) { - // Assume this location's value cannot be killed by loop side effects - // until proven otherwise. - value_killed_by_loop_side_effects_ = false; - } - } - - ReferenceInfo* GetReferenceInfo() const { return ref_info_; } - size_t GetOffset() const { return offset_; } - HInstruction* GetIndex() const { return index_; } - - // Returns the definition of declaring class' dex index. - // It's kDeclaringClassDefIndexForArrays for an array element. - int16_t GetDeclaringClassDefIndex() const { - return declaring_class_def_index_; - } - - bool IsArrayElement() const { - return index_ != nullptr; - } - - bool IsValueKilledByLoopSideEffects() const { - return value_killed_by_loop_side_effects_; - } - - void SetValueKilledByLoopSideEffects(bool val) { - value_killed_by_loop_side_effects_ = val; - } - - private: - ReferenceInfo* const ref_info_; // reference for instance/static field or array access. - const size_t offset_; // offset of static/instance field. - HInstruction* const index_; // index of an array element. - const int16_t declaring_class_def_index_; // declaring class's def's dex index. - bool value_killed_by_loop_side_effects_; // value of this location may be killed by loop - // side effects because this location is stored - // into inside a loop. This gives - // better info on whether a singleton's location - // value may be killed by loop side effects. - - DISALLOW_COPY_AND_ASSIGN(HeapLocation); -}; - -static HInstruction* HuntForOriginalReference(HInstruction* ref) { - DCHECK(ref != nullptr); - while (ref->IsNullCheck() || ref->IsBoundType()) { - ref = ref->InputAt(0); - } - return ref; -} - -// A HeapLocationCollector collects all relevant heap locations and keeps -// an aliasing matrix for all locations. -class HeapLocationCollector : public HGraphVisitor { - public: - static constexpr size_t kHeapLocationNotFound = -1; - // Start with a single uint32_t word. That's enough bits for pair-wise - // aliasing matrix of 8 heap locations. - static constexpr uint32_t kInitialAliasingMatrixBitVectorSize = 32; - - explicit HeapLocationCollector(HGraph* graph) - : HGraphVisitor(graph), - ref_info_array_(graph->GetArena()->Adapter(kArenaAllocLSE)), - heap_locations_(graph->GetArena()->Adapter(kArenaAllocLSE)), - aliasing_matrix_(graph->GetArena(), - kInitialAliasingMatrixBitVectorSize, - true, - kArenaAllocLSE), - has_heap_stores_(false), - has_volatile_(false), - has_monitor_operations_(false) {} - - size_t GetNumberOfHeapLocations() const { - return heap_locations_.size(); - } - - HeapLocation* GetHeapLocation(size_t index) const { - return heap_locations_[index]; - } - - ReferenceInfo* FindReferenceInfoOf(HInstruction* ref) const { - for (size_t i = 0; i < ref_info_array_.size(); i++) { - ReferenceInfo* ref_info = ref_info_array_[i]; - if (ref_info->GetReference() == ref) { - DCHECK_EQ(i, ref_info->GetPosition()); - return ref_info; - } - } - return nullptr; - } - - bool HasHeapStores() const { - return has_heap_stores_; - } - - bool HasVolatile() const { - return has_volatile_; - } - - bool HasMonitorOps() const { - return has_monitor_operations_; - } - - // Find and return the heap location index in heap_locations_. - size_t FindHeapLocationIndex(ReferenceInfo* ref_info, - size_t offset, - HInstruction* index, - int16_t declaring_class_def_index) const { - for (size_t i = 0; i < heap_locations_.size(); i++) { - HeapLocation* loc = heap_locations_[i]; - if (loc->GetReferenceInfo() == ref_info && - loc->GetOffset() == offset && - loc->GetIndex() == index && - loc->GetDeclaringClassDefIndex() == declaring_class_def_index) { - return i; - } - } - return kHeapLocationNotFound; - } - - // Returns true if heap_locations_[index1] and heap_locations_[index2] may alias. - bool MayAlias(size_t index1, size_t index2) const { - if (index1 < index2) { - return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index1, index2)); - } else if (index1 > index2) { - return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index2, index1)); - } else { - DCHECK(false) << "index1 and index2 are expected to be different"; - return true; - } - } - - void BuildAliasingMatrix() { - const size_t number_of_locations = heap_locations_.size(); - if (number_of_locations == 0) { - return; - } - size_t pos = 0; - // Compute aliasing info between every pair of different heap locations. - // Save the result in a matrix represented as a BitVector. - for (size_t i = 0; i < number_of_locations - 1; i++) { - for (size_t j = i + 1; j < number_of_locations; j++) { - if (ComputeMayAlias(i, j)) { - aliasing_matrix_.SetBit(CheckedAliasingMatrixPosition(i, j, pos)); - } - pos++; - } - } - } - - private: - // An allocation cannot alias with a name which already exists at the point - // of the allocation, such as a parameter or a load happening before the allocation. - bool MayAliasWithPreexistenceChecking(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const { - if (ref_info1->GetReference()->IsNewInstance() || ref_info1->GetReference()->IsNewArray()) { - // Any reference that can alias with the allocation must appear after it in the block/in - // the block's successors. In reverse post order, those instructions will be visited after - // the allocation. - return ref_info2->GetPosition() >= ref_info1->GetPosition(); - } - return true; - } - - bool CanReferencesAlias(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const { - if (ref_info1 == ref_info2) { - return true; - } else if (ref_info1->IsSingleton()) { - return false; - } else if (ref_info2->IsSingleton()) { - return false; - } else if (!MayAliasWithPreexistenceChecking(ref_info1, ref_info2) || - !MayAliasWithPreexistenceChecking(ref_info2, ref_info1)) { - return false; - } - return true; - } - - // `index1` and `index2` are indices in the array of collected heap locations. - // Returns the position in the bit vector that tracks whether the two heap - // locations may alias. - size_t AliasingMatrixPosition(size_t index1, size_t index2) const { - DCHECK(index2 > index1); - const size_t number_of_locations = heap_locations_.size(); - // It's (num_of_locations - 1) + ... + (num_of_locations - index1) + (index2 - index1 - 1). - return (number_of_locations * index1 - (1 + index1) * index1 / 2 + (index2 - index1 - 1)); - } - - // An additional position is passed in to make sure the calculated position is correct. - size_t CheckedAliasingMatrixPosition(size_t index1, size_t index2, size_t position) { - size_t calculated_position = AliasingMatrixPosition(index1, index2); - DCHECK_EQ(calculated_position, position); - return calculated_position; - } - - // Compute if two locations may alias to each other. - bool ComputeMayAlias(size_t index1, size_t index2) const { - HeapLocation* loc1 = heap_locations_[index1]; - HeapLocation* loc2 = heap_locations_[index2]; - if (loc1->GetOffset() != loc2->GetOffset()) { - // Either two different instance fields, or one is an instance - // field and the other is an array element. - return false; - } - if (loc1->GetDeclaringClassDefIndex() != loc2->GetDeclaringClassDefIndex()) { - // Different types. - return false; - } - if (!CanReferencesAlias(loc1->GetReferenceInfo(), loc2->GetReferenceInfo())) { - return false; - } - if (loc1->IsArrayElement() && loc2->IsArrayElement()) { - HInstruction* array_index1 = loc1->GetIndex(); - HInstruction* array_index2 = loc2->GetIndex(); - DCHECK(array_index1 != nullptr); - DCHECK(array_index2 != nullptr); - if (array_index1->IsIntConstant() && - array_index2->IsIntConstant() && - array_index1->AsIntConstant()->GetValue() != array_index2->AsIntConstant()->GetValue()) { - // Different constant indices do not alias. - return false; - } - ReferenceInfo* ref_info = loc1->GetReferenceInfo(); - ref_info->SetHasIndexAliasing(true); - } - return true; - } - - ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* instruction) { - ReferenceInfo* ref_info = FindReferenceInfoOf(instruction); - if (ref_info == nullptr) { - size_t pos = ref_info_array_.size(); - ref_info = new (GetGraph()->GetArena()) ReferenceInfo(instruction, pos); - ref_info_array_.push_back(ref_info); - } - return ref_info; - } - - void CreateReferenceInfoForReferenceType(HInstruction* instruction) { - if (instruction->GetType() != Primitive::kPrimNot) { - return; - } - DCHECK(FindReferenceInfoOf(instruction) == nullptr); - GetOrCreateReferenceInfo(instruction); - } - - HeapLocation* GetOrCreateHeapLocation(HInstruction* ref, - size_t offset, - HInstruction* index, - int16_t declaring_class_def_index) { - HInstruction* original_ref = HuntForOriginalReference(ref); - ReferenceInfo* ref_info = GetOrCreateReferenceInfo(original_ref); - size_t heap_location_idx = FindHeapLocationIndex( - ref_info, offset, index, declaring_class_def_index); - if (heap_location_idx == kHeapLocationNotFound) { - HeapLocation* heap_loc = new (GetGraph()->GetArena()) - HeapLocation(ref_info, offset, index, declaring_class_def_index); - heap_locations_.push_back(heap_loc); - return heap_loc; - } - return heap_locations_[heap_location_idx]; - } - - HeapLocation* VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) { - if (field_info.IsVolatile()) { - has_volatile_ = true; - } - const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex(); - const size_t offset = field_info.GetFieldOffset().SizeValue(); - return GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index); - } - - void VisitArrayAccess(HInstruction* array, HInstruction* index) { - GetOrCreateHeapLocation(array, HeapLocation::kInvalidFieldOffset, - index, HeapLocation::kDeclaringClassDefIndexForArrays); - } - - void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE { - VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { - HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); - has_heap_stores_ = true; - if (location->GetReferenceInfo()->IsSingleton()) { - // A singleton's location value may be killed by loop side effects if it's - // defined before that loop, and it's stored into inside that loop. - HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation(); - if (loop_info != nullptr) { - HInstruction* ref = location->GetReferenceInfo()->GetReference(); - DCHECK(ref->IsNewInstance()); - if (loop_info->IsDefinedOutOfTheLoop(ref)) { - // ref's location value may be killed by this loop's side effects. - location->SetValueKilledByLoopSideEffects(true); - } else { - // ref is defined inside this loop so this loop's side effects cannot - // kill its location value at the loop header since ref/its location doesn't - // exist yet at the loop header. - } - } - } else { - // For non-singletons, value_killed_by_loop_side_effects_ is inited to - // true. - DCHECK_EQ(location->IsValueKilledByLoopSideEffects(), true); - } - } - - void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE { - VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE { - VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); - has_heap_stores_ = true; - } - - // We intentionally don't collect HUnresolvedInstanceField/HUnresolvedStaticField accesses - // since we cannot accurately track the fields. - - void VisitArrayGet(HArrayGet* instruction) OVERRIDE { - VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1)); - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitArraySet(HArraySet* instruction) OVERRIDE { - VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1)); - has_heap_stores_ = true; - } - - void VisitNewInstance(HNewInstance* new_instance) OVERRIDE { - // Any references appearing in the ref_info_array_ so far cannot alias with new_instance. - CreateReferenceInfoForReferenceType(new_instance); - } - - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitInvokeVirtual(HInvokeVirtual* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitInvokeInterface(HInvokeInterface* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitParameterValue(HParameterValue* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitSelect(HSelect* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) OVERRIDE { - has_monitor_operations_ = true; - } - - ArenaVector<ReferenceInfo*> ref_info_array_; // All references used for heap accesses. - ArenaVector<HeapLocation*> heap_locations_; // All heap locations. - ArenaBitVector aliasing_matrix_; // aliasing info between each pair of locations. - bool has_heap_stores_; // If there is no heap stores, LSE acts as GVN with better - // alias analysis and won't be as effective. - bool has_volatile_; // If there are volatile field accesses. - bool has_monitor_operations_; // If there are monitor operations. - - DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector); -}; - // An unknown heap value. Loads with such a value in the heap location cannot be eliminated. // A heap location can be set to kUnknownHeapValue when: // - initially set a value. @@ -516,7 +46,7 @@ class LSEVisitor : public HGraphVisitor { side_effects_(side_effects), heap_values_for_(graph->GetBlocks().size(), ArenaVector<HInstruction*>(heap_locations_collector. - GetNumberOfHeapLocations(), + GetNumberOfHeapLocations(), kUnknownHeapValue, graph->GetArena()->Adapter(kArenaAllocLSE)), graph->GetArena()->Adapter(kArenaAllocLSE)), @@ -566,14 +96,20 @@ class LSEVisitor : public HGraphVisitor { store->GetBlock()->RemoveInstruction(store); } - // Eliminate allocations that are not used. + // Eliminate singleton-classified instructions: + // * - Constructor fences (they never escape this thread). + // * - Allocations (if they are unused). for (HInstruction* new_instance : singleton_new_instances_) { + HConstructorFence::RemoveConstructorFences(new_instance); + if (!new_instance->HasNonEnvironmentUses()) { new_instance->RemoveEnvironmentUsers(); new_instance->GetBlock()->RemoveInstruction(new_instance); } } for (HInstruction* new_array : singleton_new_arrays_) { + HConstructorFence::RemoveConstructorFences(new_array); + if (!new_array->HasNonEnvironmentUses()) { new_array->RemoveEnvironmentUsers(); new_array->GetBlock()->RemoveInstruction(new_array); @@ -754,7 +290,7 @@ class LSEVisitor : public HGraphVisitor { size_t offset, HInstruction* index, int16_t declaring_class_def_index) { - HInstruction* original_ref = HuntForOriginalReference(ref); + HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref); ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref); size_t idx = heap_location_collector_.FindHeapLocationIndex( ref_info, offset, index, declaring_class_def_index); @@ -821,7 +357,7 @@ class LSEVisitor : public HGraphVisitor { HInstruction* index, int16_t declaring_class_def_index, HInstruction* value) { - HInstruction* original_ref = HuntForOriginalReference(ref); + HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref); ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref); size_t idx = heap_location_collector_.FindHeapLocationIndex( ref_info, offset, index, declaring_class_def_index); @@ -1121,25 +657,12 @@ void LoadStoreElimination::Run() { // Skip this optimization. return; } - HeapLocationCollector heap_location_collector(graph_); - for (HBasicBlock* block : graph_->GetReversePostOrder()) { - heap_location_collector.VisitBasicBlock(block); - } - if (heap_location_collector.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) { - // Bail out if there are too many heap locations to deal with. - return; - } - if (!heap_location_collector.HasHeapStores()) { - // Without heap stores, this pass would act mostly as GVN on heap accesses. + const HeapLocationCollector& heap_location_collector = lsa_.GetHeapLocationCollector(); + if (heap_location_collector.GetNumberOfHeapLocations() == 0) { + // No HeapLocation information from LSA, skip this optimization. return; } - if (heap_location_collector.HasVolatile() || heap_location_collector.HasMonitorOps()) { - // Don't do load/store elimination if the method has volatile field accesses or - // monitor operations, for now. - // TODO: do it right. - return; - } - heap_location_collector.BuildAliasingMatrix(); + LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_); for (HBasicBlock* block : graph_->GetReversePostOrder()) { lse_visitor.VisitBasicBlock(block); diff --git a/compiler/optimizing/load_store_elimination.h b/compiler/optimizing/load_store_elimination.h index 1d9e5c8da6..efe71c733a 100644 --- a/compiler/optimizing/load_store_elimination.h +++ b/compiler/optimizing/load_store_elimination.h @@ -22,12 +22,16 @@ namespace art { class SideEffectsAnalysis; +class LoadStoreAnalysis; class LoadStoreElimination : public HOptimization { public: - LoadStoreElimination(HGraph* graph, const SideEffectsAnalysis& side_effects) + LoadStoreElimination(HGraph* graph, + const SideEffectsAnalysis& side_effects, + const LoadStoreAnalysis& lsa) : HOptimization(graph, kLoadStoreEliminationPassName), - side_effects_(side_effects) {} + side_effects_(side_effects), + lsa_(lsa) {} void Run() OVERRIDE; @@ -35,6 +39,7 @@ class LoadStoreElimination : public HOptimization { private: const SideEffectsAnalysis& side_effects_; + const LoadStoreAnalysis& lsa_; DISALLOW_COPY_AND_ASSIGN(LoadStoreElimination); }; diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index d5e105951b..4334ab10bd 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -71,7 +71,7 @@ static bool IsSignExtensionAndGet(HInstruction* instruction, // extension when represented in the *width* of the given narrower data type // (the fact that char normally zero extends does not matter here). int64_t value = 0; - if (IsInt64AndGet(instruction, &value)) { + if (IsInt64AndGet(instruction, /*out*/ &value)) { switch (type) { case Primitive::kPrimByte: if (std::numeric_limits<int8_t>::min() <= value && @@ -119,7 +119,7 @@ static bool IsZeroExtensionAndGet(HInstruction* instruction, // extension when represented in the *width* of the given narrower data type // (the fact that byte/short normally sign extend does not matter here). int64_t value = 0; - if (IsInt64AndGet(instruction, &value)) { + if (IsInt64AndGet(instruction, /*out*/ &value)) { switch (type) { case Primitive::kPrimByte: if (std::numeric_limits<uint8_t>::min() <= value && @@ -173,6 +173,84 @@ static bool IsZeroExtensionAndGet(HInstruction* instruction, return false; } +// Detect situations with same-extension narrower operands. +// Returns true on success and sets is_unsigned accordingly. +static bool IsNarrowerOperands(HInstruction* a, + HInstruction* b, + Primitive::Type type, + /*out*/ HInstruction** r, + /*out*/ HInstruction** s, + /*out*/ bool* is_unsigned) { + if (IsSignExtensionAndGet(a, type, r) && IsSignExtensionAndGet(b, type, s)) { + *is_unsigned = false; + return true; + } else if (IsZeroExtensionAndGet(a, type, r) && IsZeroExtensionAndGet(b, type, s)) { + *is_unsigned = true; + return true; + } + return false; +} + +// As above, single operand. +static bool IsNarrowerOperand(HInstruction* a, + Primitive::Type type, + /*out*/ HInstruction** r, + /*out*/ bool* is_unsigned) { + if (IsSignExtensionAndGet(a, type, r)) { + *is_unsigned = false; + return true; + } else if (IsZeroExtensionAndGet(a, type, r)) { + *is_unsigned = true; + return true; + } + return false; +} + +// Detect up to two instructions a and b, and an acccumulated constant c. +static bool IsAddConstHelper(HInstruction* instruction, + /*out*/ HInstruction** a, + /*out*/ HInstruction** b, + /*out*/ int64_t* c, + int32_t depth) { + static constexpr int32_t kMaxDepth = 8; // don't search too deep + int64_t value = 0; + if (IsInt64AndGet(instruction, &value)) { + *c += value; + return true; + } else if (instruction->IsAdd() && depth <= kMaxDepth) { + return IsAddConstHelper(instruction->InputAt(0), a, b, c, depth + 1) && + IsAddConstHelper(instruction->InputAt(1), a, b, c, depth + 1); + } else if (*a == nullptr) { + *a = instruction; + return true; + } else if (*b == nullptr) { + *b = instruction; + return true; + } + return false; // too many non-const operands +} + +// Detect a + b + c for an optional constant c. +static bool IsAddConst(HInstruction* instruction, + /*out*/ HInstruction** a, + /*out*/ HInstruction** b, + /*out*/ int64_t* c) { + if (instruction->IsAdd()) { + // Try to find a + b and accumulated c. + if (IsAddConstHelper(instruction->InputAt(0), a, b, c, /*depth*/ 0) && + IsAddConstHelper(instruction->InputAt(1), a, b, c, /*depth*/ 0) && + *b != nullptr) { + return true; + } + // Found a + b. + *a = instruction->InputAt(0); + *b = instruction->InputAt(1); + *c = 0; + return true; + } + return false; +} + // Test vector restrictions. static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) { return (restrictions & tested) != 0; @@ -733,7 +811,7 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite(); } -// TODO: more operations and intrinsics, detect saturation arithmetic, etc. +// TODO: saturation arithmetic. bool HLoopOptimization::VectorizeUse(LoopNode* node, HInstruction* instruction, bool generate_code, @@ -755,10 +833,9 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, } return true; } else if (instruction->IsArrayGet()) { - // Strings are different, with a different offset to the actual data - // and some compressed to save memory. For now, all cases are rejected - // to avoid the complexity. - if (instruction->AsArrayGet()->IsStringCharAt()) { + // Deal with vector restrictions. + if (instruction->AsArrayGet()->IsStringCharAt() && + HasVectorRestrictions(restrictions, kNoStringCharAt)) { return false; } // Accept a right-hand-side array base[index] for @@ -850,30 +927,38 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, return true; } // Deal with vector restrictions. + HInstruction* opa = instruction->InputAt(0); + HInstruction* opb = instruction->InputAt(1); + HInstruction* r = opa; + bool is_unsigned = false; if ((HasVectorRestrictions(restrictions, kNoShift)) || (instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) { return false; // unsupported instruction - } else if ((instruction->IsShr() || instruction->IsUShr()) && - HasVectorRestrictions(restrictions, kNoHiBits)) { - return false; // hibits may impact lobits; TODO: we can do better! + } else if (HasVectorRestrictions(restrictions, kNoHiBits)) { + // Shifts right need extra care to account for higher order bits. + // TODO: less likely shr/unsigned and ushr/signed can by flipping signess. + if (instruction->IsShr() && + (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) { + return false; // reject, unless all operands are sign-extension narrower + } else if (instruction->IsUShr() && + (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || !is_unsigned)) { + return false; // reject, unless all operands are zero-extension narrower + } } // Accept shift operator for vectorizable/invariant operands. // TODO: accept symbolic, albeit loop invariant shift factors. - HInstruction* opa = instruction->InputAt(0); - HInstruction* opb = instruction->InputAt(1); - int64_t value = 0; - if (VectorizeUse(node, opa, generate_code, type, restrictions) && IsInt64AndGet(opb, &value)) { - // Make sure shift distance only looks at lower bits, as defined for sequential shifts. - int64_t mask = (instruction->GetType() == Primitive::kPrimLong) - ? kMaxLongShiftDistance - : kMaxIntShiftDistance; - int64_t distance = value & mask; + DCHECK(r != nullptr); + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = opa; + } + int64_t distance = 0; + if (VectorizeUse(node, r, generate_code, type, restrictions) && + IsInt64AndGet(opb, /*out*/ &distance)) { // Restrict shift distance to packed data type width. int64_t max_distance = Primitive::ComponentSize(type) * 8; if (0 <= distance && distance < max_distance) { if (generate_code) { - HInstruction* s = graph_->GetIntConstant(distance); - GenerateVecOp(instruction, vector_map_->Get(opa), s, type); + GenerateVecOp(instruction, vector_map_->Get(r), opb, type); } return true; } @@ -887,16 +972,59 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, case Intrinsics::kMathAbsFloat: case Intrinsics::kMathAbsDouble: { // Deal with vector restrictions. - if (HasVectorRestrictions(restrictions, kNoAbs) || - HasVectorRestrictions(restrictions, kNoHiBits)) { - // TODO: we can do better for some hibits cases. + HInstruction* opa = instruction->InputAt(0); + HInstruction* r = opa; + bool is_unsigned = false; + if (HasVectorRestrictions(restrictions, kNoAbs)) { return false; + } else if (HasVectorRestrictions(restrictions, kNoHiBits) && + (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) { + return false; // reject, unless operand is sign-extension narrower } // Accept ABS(x) for vectorizable operand. + DCHECK(r != nullptr); + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = opa; + } + if (VectorizeUse(node, r, generate_code, type, restrictions)) { + if (generate_code) { + GenerateVecOp(instruction, vector_map_->Get(r), nullptr, type); + } + return true; + } + return false; + } + case Intrinsics::kMathMinIntInt: + case Intrinsics::kMathMinLongLong: + case Intrinsics::kMathMinFloatFloat: + case Intrinsics::kMathMinDoubleDouble: + case Intrinsics::kMathMaxIntInt: + case Intrinsics::kMathMaxLongLong: + case Intrinsics::kMathMaxFloatFloat: + case Intrinsics::kMathMaxDoubleDouble: { + // Deal with vector restrictions. HInstruction* opa = instruction->InputAt(0); - if (VectorizeUse(node, opa, generate_code, type, restrictions)) { + HInstruction* opb = instruction->InputAt(1); + HInstruction* r = opa; + HInstruction* s = opb; + bool is_unsigned = false; + if (HasVectorRestrictions(restrictions, kNoMinMax)) { + return false; + } else if (HasVectorRestrictions(restrictions, kNoHiBits) && + !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) { + return false; // reject, unless all operands are same-extension narrower + } + // Accept MIN/MAX(x, y) for vectorizable operands. + DCHECK(r != nullptr && s != nullptr); + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = opa; + s = opb; + } + if (VectorizeUse(node, r, generate_code, type, restrictions) && + VectorizeUse(node, s, generate_code, type, restrictions)) { if (generate_code) { - GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type); + GenerateVecOp( + instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned); } return true; } @@ -921,17 +1049,17 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: - *restrictions |= kNoDiv | kNoAbs; + *restrictions |= kNoDiv; return TrySetVectorLength(16); case Primitive::kPrimChar: case Primitive::kPrimShort: - *restrictions |= kNoDiv | kNoAbs; + *restrictions |= kNoDiv; return TrySetVectorLength(8); case Primitive::kPrimInt: *restrictions |= kNoDiv; return TrySetVectorLength(4); case Primitive::kPrimLong: - *restrictions |= kNoDiv | kNoMul; + *restrictions |= kNoDiv | kNoMul | kNoMinMax; return TrySetVectorLength(2); case Primitive::kPrimFloat: return TrySetVectorLength(4); @@ -957,11 +1085,13 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric *restrictions |= kNoDiv; return TrySetVectorLength(4); case Primitive::kPrimLong: - *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs; + *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax; return TrySetVectorLength(2); case Primitive::kPrimFloat: + *restrictions |= kNoMinMax; // -0.0 vs +0.0 return TrySetVectorLength(4); case Primitive::kPrimDouble: + *restrictions |= kNoMinMax; // -0.0 vs +0.0 return TrySetVectorLength(2); default: break; @@ -969,9 +1099,36 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric } return false; case kMips: - case kMips64: // TODO: implement MIPS SIMD. return false; + case kMips64: + if (features->AsMips64InstructionSetFeatures()->HasMsa()) { + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + *restrictions |= kNoDiv | kNoMinMax; + return TrySetVectorLength(16); + case Primitive::kPrimChar: + case Primitive::kPrimShort: + *restrictions |= kNoDiv | kNoMinMax | kNoStringCharAt; + return TrySetVectorLength(8); + case Primitive::kPrimInt: + *restrictions |= kNoDiv | kNoMinMax; + return TrySetVectorLength(4); + case Primitive::kPrimLong: + *restrictions |= kNoDiv | kNoMinMax; + return TrySetVectorLength(2); + case Primitive::kPrimFloat: + *restrictions |= kNoMinMax; + return TrySetVectorLength(4); + case Primitive::kPrimDouble: + *restrictions |= kNoMinMax; + return TrySetVectorLength(2); + default: + break; + } // switch type + } + return false; default: return false; } // switch instruction set @@ -1058,13 +1215,14 @@ void HLoopOptimization::GenerateVecMem(HInstruction* org, void HLoopOptimization::GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, - Primitive::Type type) { + Primitive::Type type, + bool is_unsigned) { if (vector_mode_ == kSequential) { - // Scalar code follows implicit integral promotion. - if (type == Primitive::kPrimBoolean || - type == Primitive::kPrimByte || - type == Primitive::kPrimChar || - type == Primitive::kPrimShort) { + // Non-converting scalar code follows implicit integral promotion. + if (!org->IsTypeConversion() && (type == Primitive::kPrimBoolean || + type == Primitive::kPrimByte || + type == Primitive::kPrimChar || + type == Primitive::kPrimShort)) { type = Primitive::kPrimInt; } } @@ -1141,6 +1299,22 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, DCHECK(opb == nullptr); vector = new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_); break; + case Intrinsics::kMathMinIntInt: + case Intrinsics::kMathMinLongLong: + case Intrinsics::kMathMinFloatFloat: + case Intrinsics::kMathMinDoubleDouble: { + vector = new (global_allocator_) + HVecMin(global_allocator_, opa, opb, type, vector_length_, is_unsigned); + break; + } + case Intrinsics::kMathMaxIntInt: + case Intrinsics::kMathMaxLongLong: + case Intrinsics::kMathMaxFloatFloat: + case Intrinsics::kMathMaxDoubleDouble: { + vector = new (global_allocator_) + HVecMax(global_allocator_, opa, opb, type, vector_length_, is_unsigned); + break; + } default: LOG(FATAL) << "Unsupported SIMD intrinsic"; UNREACHABLE(); @@ -1150,9 +1324,10 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, // corresponding new scalar instructions in the loop. The instruction will get an // environment while being inserted from the instruction map in original program order. DCHECK(vector_mode_ == kSequential); + size_t num_args = invoke->GetNumberOfArguments(); HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect( global_allocator_, - invoke->GetNumberOfArguments(), + num_args, invoke->GetType(), invoke->GetDexPc(), invoke->GetDexMethodIndex(), @@ -1162,8 +1337,14 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, invoke->GetTargetMethod(), invoke->GetClinitCheckRequirement()); HInputsRef inputs = invoke->GetInputs(); - for (size_t index = 0; index < inputs.size(); ++index) { - new_invoke->SetArgumentAt(index, vector_map_->Get(inputs[index])); + size_t num_inputs = inputs.size(); + DCHECK_LE(num_args, num_inputs); + DCHECK_EQ(num_inputs, new_invoke->GetInputs().size()); // both invokes agree + for (size_t index = 0; index < num_inputs; ++index) { + HInstruction* new_input = index < num_args + ? vector_map_->Get(inputs[index]) + : inputs[index]; // beyond arguments: just pass through + new_invoke->SetArgumentAt(index, new_input); } new_invoke->SetIntrinsic(invoke->GetIntrinsic(), kNeedsEnvironmentOrCache, @@ -1200,34 +1381,30 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, Primitive::Type type, uint64_t restrictions) { // Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1 - // (note whether the sign bit in higher precision is shifted in has no effect + // (note whether the sign bit in wider precision is shifted in has no effect // on the narrow precision computed by the idiom). - int64_t value = 0; + int64_t distance = 0; if ((instruction->IsShr() || instruction->IsUShr()) && - IsInt64AndGet(instruction->InputAt(1), &value) && value == 1) { - // - // TODO: make following code less sensitive to associativity and commutativity differences. - // - HInstruction* x = instruction->InputAt(0); - // Test for an optional rounding part (x + 1) >> 1. - bool is_rounded = false; - if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), &value) && value == 1) { - x = x->InputAt(0); - is_rounded = true; - } - // Test for a core addition (a + b) >> 1 (possibly rounded), either unsigned or signed. - if (x->IsAdd()) { - HInstruction* a = x->InputAt(0); - HInstruction* b = x->InputAt(1); + IsInt64AndGet(instruction->InputAt(1), /*out*/ &distance) && distance == 1) { + // Test for (a + b + c) >> 1 for optional constant c. + HInstruction* a = nullptr; + HInstruction* b = nullptr; + int64_t c = 0; + if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) { + DCHECK(a != nullptr && b != nullptr); + // Accept c == 1 (rounded) or c == 0 (not rounded). + bool is_rounded = false; + if (c == 1) { + is_rounded = true; + } else if (c != 0) { + return false; + } + // Accept consistent zero or sign extension on operands a and b. HInstruction* r = nullptr; HInstruction* s = nullptr; bool is_unsigned = false; - if (IsZeroExtensionAndGet(a, type, &r) && IsZeroExtensionAndGet(b, type, &s)) { - is_unsigned = true; - } else if (IsSignExtensionAndGet(a, type, &r) && IsSignExtensionAndGet(b, type, &s)) { - is_unsigned = false; - } else { + if (!IsNarrowerOperands(a, b, type, &r, &s, &is_unsigned)) { return false; } // Deal with vector restrictions. @@ -1238,6 +1415,10 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, // Accept recognized halving add for vectorizable operands. Vectorized code uses the // shorthand idiomatic operation. Sequential code uses the original scalar expressions. DCHECK(r != nullptr && s != nullptr); + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = instruction->InputAt(0); + s = instruction->InputAt(1); + } if (VectorizeUse(node, r, generate_code, type, restrictions) && VectorizeUse(node, s, generate_code, type, restrictions)) { if (generate_code) { @@ -1251,12 +1432,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, is_unsigned, is_rounded)); } else { - VectorizeUse(node, instruction->InputAt(0), generate_code, type, restrictions); - VectorizeUse(node, instruction->InputAt(1), generate_code, type, restrictions); - GenerateVecOp(instruction, - vector_map_->Get(instruction->InputAt(0)), - vector_map_->Get(instruction->InputAt(1)), - type); + GenerateVecOp(instruction, vector_map_->Get(r), vector_map_->Get(s), type); } } return true; diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index c3b0b5d996..cc6343aeb5 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -71,6 +71,8 @@ class HLoopOptimization : public HOptimization { kNoSignedHAdd = 32, // no signed halving add kNoUnroundedHAdd = 64, // no unrounded halving add kNoAbs = 128, // no absolute value + kNoMinMax = 256, // no min/max + kNoStringCharAt = 512, // no StringCharAt }; /* @@ -136,7 +138,11 @@ class HLoopOptimization : public HOptimization { HInstruction* opa, HInstruction* opb, Primitive::Type type); - void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type); + void GenerateVecOp(HInstruction* org, + HInstruction* opa, + HInstruction* opb, + Primitive::Type type, + bool is_unsigned = false); // Vectorization idioms. bool VectorizeHalvingAddIdiom(LoopNode* node, diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index cd05a884e9..9a91287670 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -528,6 +528,15 @@ HCurrentMethod* HGraph::GetCurrentMethod() { return cached_current_method_; } +const char* HGraph::GetMethodName() const { + const DexFile::MethodId& method_id = dex_file_.GetMethodId(method_idx_); + return dex_file_.GetMethodName(method_id); +} + +std::string HGraph::PrettyMethod(bool with_signature) const { + return dex_file_.PrettyMethod(method_idx_, with_signature); +} + HConstant* HGraph::GetConstant(Primitive::Type type, int64_t value, uint32_t dex_pc) { switch (type) { case Primitive::Type::kPrimBoolean: @@ -1150,6 +1159,95 @@ void HVariableInputSizeInstruction::RemoveInputAt(size_t index) { } } +void HVariableInputSizeInstruction::RemoveAllInputs() { + RemoveAsUserOfAllInputs(); + DCHECK(!HasNonEnvironmentUses()); + + inputs_.clear(); + DCHECK_EQ(0u, InputCount()); +} + +void HConstructorFence::RemoveConstructorFences(HInstruction* instruction) { + DCHECK(instruction->GetBlock() != nullptr); + // Removing constructor fences only makes sense for instructions with an object return type. + DCHECK_EQ(Primitive::kPrimNot, instruction->GetType()); + + // Efficient implementation that simultaneously (in one pass): + // * Scans the uses list for all constructor fences. + // * Deletes that constructor fence from the uses list of `instruction`. + // * Deletes `instruction` from the constructor fence's inputs. + // * Deletes the constructor fence if it now has 0 inputs. + + const HUseList<HInstruction*>& uses = instruction->GetUses(); + // Warning: Although this is "const", we might mutate the list when calling RemoveInputAt. + for (auto it = uses.begin(), end = uses.end(); it != end; ) { + const HUseListNode<HInstruction*>& use_node = *it; + HInstruction* const use_instruction = use_node.GetUser(); + + // Advance the iterator immediately once we fetch the use_node. + // Warning: If the input is removed, the current iterator becomes invalid. + ++it; + + if (use_instruction->IsConstructorFence()) { + HConstructorFence* ctor_fence = use_instruction->AsConstructorFence(); + size_t input_index = use_node.GetIndex(); + + // Process the candidate instruction for removal + // from the graph. + + // Constructor fence instructions are never + // used by other instructions. + // + // If we wanted to make this more generic, it + // could be a runtime if statement. + DCHECK(!ctor_fence->HasUses()); + + // A constructor fence's return type is "kPrimVoid" + // and therefore it can't have any environment uses. + DCHECK(!ctor_fence->HasEnvironmentUses()); + + // Remove the inputs first, otherwise removing the instruction + // will try to remove its uses while we are already removing uses + // and this operation will fail. + DCHECK_EQ(instruction, ctor_fence->InputAt(input_index)); + + // Removing the input will also remove the `use_node`. + // (Do not look at `use_node` after this, it will be a dangling reference). + ctor_fence->RemoveInputAt(input_index); + + // Once all inputs are removed, the fence is considered dead and + // is removed. + if (ctor_fence->InputCount() == 0u) { + ctor_fence->GetBlock()->RemoveInstruction(ctor_fence); + } + } + } + + if (kIsDebugBuild) { + // Post-condition checks: + // * None of the uses of `instruction` are a constructor fence. + // * The `instruction` itself did not get removed from a block. + for (const HUseListNode<HInstruction*>& use_node : instruction->GetUses()) { + CHECK(!use_node.GetUser()->IsConstructorFence()); + } + CHECK(instruction->GetBlock() != nullptr); + } +} + +HInstruction* HConstructorFence::GetAssociatedAllocation() { + HInstruction* new_instance_inst = GetPrevious(); + // Check if the immediately preceding instruction is a new-instance/new-array. + // Otherwise this fence is for protecting final fields. + if (new_instance_inst != nullptr && + (new_instance_inst->IsNewInstance() || new_instance_inst->IsNewArray())) { + // TODO: Need to update this code to handle multiple inputs. + DCHECK_EQ(InputCount(), 1u); + return new_instance_inst; + } else { + return nullptr; + } +} + #define DEFINE_ACCEPT(name, super) \ void H##name::Accept(HGraphVisitor* visitor) { \ visitor->Visit##name(this); \ @@ -2538,15 +2636,17 @@ bool HInvokeStaticOrDirect::NeedsDexCacheOfDeclaringClass() const { std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs) { switch (rhs) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: - return os << "string_init"; + return os << "StringInit"; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - return os << "recursive"; + return os << "Recursive"; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: + return os << "BootImageLinkTimePcRelative"; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: - return os << "direct"; + return os << "DirectAddress"; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - return os << "dex_cache_pc_relative"; + return os << "DexCachePcRelative"; case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: - return os << "dex_cache_via_method"; + return os << "DexCacheViaMethod"; default: LOG(FATAL) << "Unknown MethodLoadKind: " << static_cast<int>(rhs); UNREACHABLE(); @@ -2590,7 +2690,7 @@ bool HLoadClass::InstructionDataEquals(const HInstruction* other) const { void HLoadClass::SetLoadKind(LoadKind load_kind) { SetPackedField<LoadKindField>(load_kind); - if (load_kind != LoadKind::kDexCacheViaMethod && + if (load_kind != LoadKind::kRuntimeCall && load_kind != LoadKind::kReferrersClass) { RemoveAsUserOfInput(0u); SetRawInputAt(0u, nullptr); @@ -2606,8 +2706,6 @@ std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) { switch (rhs) { case HLoadClass::LoadKind::kReferrersClass: return os << "ReferrersClass"; - case HLoadClass::LoadKind::kBootImageLinkTimeAddress: - return os << "BootImageLinkTimeAddress"; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: return os << "BootImageLinkTimePcRelative"; case HLoadClass::LoadKind::kBootImageAddress: @@ -2616,8 +2714,8 @@ std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) { return os << "BssEntry"; case HLoadClass::LoadKind::kJitTableAddress: return os << "JitTableAddress"; - case HLoadClass::LoadKind::kDexCacheViaMethod: - return os << "DexCacheViaMethod"; + case HLoadClass::LoadKind::kRuntimeCall: + return os << "RuntimeCall"; default: LOG(FATAL) << "Unknown HLoadClass::LoadKind: " << static_cast<int>(rhs); UNREACHABLE(); @@ -2645,10 +2743,10 @@ bool HLoadString::InstructionDataEquals(const HInstruction* other) const { void HLoadString::SetLoadKind(LoadKind load_kind) { // Once sharpened, the load kind should not be changed again. - DCHECK_EQ(GetLoadKind(), LoadKind::kDexCacheViaMethod); + DCHECK_EQ(GetLoadKind(), LoadKind::kRuntimeCall); SetPackedField<LoadKindField>(load_kind); - if (load_kind != LoadKind::kDexCacheViaMethod) { + if (load_kind != LoadKind::kRuntimeCall) { RemoveAsUserOfInput(0u); SetRawInputAt(0u, nullptr); } @@ -2660,8 +2758,6 @@ void HLoadString::SetLoadKind(LoadKind load_kind) { std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) { switch (rhs) { - case HLoadString::LoadKind::kBootImageLinkTimeAddress: - return os << "BootImageLinkTimeAddress"; case HLoadString::LoadKind::kBootImageLinkTimePcRelative: return os << "BootImageLinkTimePcRelative"; case HLoadString::LoadKind::kBootImageAddress: @@ -2670,8 +2766,8 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) { return os << "BssEntry"; case HLoadString::LoadKind::kJitTableAddress: return os << "JitTableAddress"; - case HLoadString::LoadKind::kDexCacheViaMethod: - return os << "DexCacheViaMethod"; + case HLoadString::LoadKind::kRuntimeCall: + return os << "RuntimeCall"; default: LOG(FATAL) << "Unknown HLoadString::LoadKind: " << static_cast<int>(rhs); UNREACHABLE(); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 872b9083fe..befd0ff97b 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -47,6 +47,7 @@ namespace art { class GraphChecker; class HBasicBlock; +class HConstructorFence; class HCurrentMethod; class HDoubleConstant; class HEnvironment; @@ -58,6 +59,7 @@ class HIntConstant; class HInvoke; class HLongConstant; class HNullConstant; +class HParameterValue; class HPhi; class HSuspendCheck; class HTryBoundary; @@ -538,6 +540,12 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { return method_idx_; } + // Get the method name (without the signature), e.g. "<init>" + const char* GetMethodName() const; + + // Get the pretty method name (class + name + optionally signature). + std::string PrettyMethod(bool with_signature = true) const; + InvokeType GetInvokeType() const { return invoke_type_; } @@ -1298,6 +1306,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(ClearException, Instruction) \ M(ClinitCheck, Instruction) \ M(Compare, BinaryOperation) \ + M(ConstructorFence, Instruction) \ M(CurrentMethod, Instruction) \ M(ShouldDeoptimizeFlag, Instruction) \ M(Deoptimize, Instruction) \ @@ -1397,7 +1406,8 @@ class HLoopInformationOutwardIterator : public ValueObject { M(BitwiseNegatedRight, Instruction) \ M(DataProcWithShifterOp, Instruction) \ M(MultiplyAccumulate, Instruction) \ - M(IntermediateAddress, Instruction) + M(IntermediateAddress, Instruction) \ + M(IntermediateAddressIndex, Instruction) #endif #ifndef ART_ENABLE_CODEGEN_arm @@ -1477,8 +1487,11 @@ FOR_EACH_INSTRUCTION(FORWARD_DECLARATION) template <typename T> class HUseListNode : public ArenaObject<kArenaAllocUseListNode> { public: + // Get the instruction which has this use as one of the inputs. T GetUser() const { return user_; } + // Get the position of the input record that this use corresponds to. size_t GetIndex() const { return index_; } + // Set the position of the input record that this use corresponds to. void SetIndex(size_t index) { index_ = index; } // Hook for the IntrusiveForwardList<>. @@ -1777,7 +1790,7 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { uint32_t dex_pc, HInstruction* holder) : vregs_(number_of_vregs, arena->Adapter(kArenaAllocEnvironmentVRegs)), - locations_(number_of_vregs, arena->Adapter(kArenaAllocEnvironmentLocations)), + locations_(arena->Adapter(kArenaAllocEnvironmentLocations)), parent_(nullptr), method_(method), dex_pc_(dex_pc), @@ -1791,6 +1804,11 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { to_copy.GetDexPc(), holder) {} + void AllocateLocations() { + DCHECK(locations_.empty()); + locations_.resize(vregs_.size()); + } + void SetAndCopyParentChain(ArenaAllocator* allocator, HEnvironment* parent) { if (parent_ != nullptr) { parent_->SetAndCopyParentChain(allocator, parent); @@ -2038,7 +2056,8 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { !IsNativeDebugInfo() && !IsParameterValue() && // If we added an explicit barrier then we should keep it. - !IsMemoryBarrier(); + !IsMemoryBarrier() && + !IsConstructorFence(); } bool IsDeadAndRemovable() const { @@ -2432,6 +2451,11 @@ class HVariableInputSizeInstruction : public HInstruction { void InsertInputAt(size_t index, HInstruction* input); void RemoveInputAt(size_t index); + // Removes all the inputs. + // Also removes this instructions from each input's use list + // (for non-environment uses only). + void RemoveAllInputs(); + protected: HVariableInputSizeInstruction(SideEffects side_effects, uint32_t dex_pc, @@ -4134,6 +4158,10 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { // Use the method's own ArtMethod* loaded by the register allocator. kRecursive, + // Use PC-relative boot image ArtMethod* address that will be known at link time. + // Used for boot image methods referenced by boot image code. + kBootImageLinkTimePcRelative, + // Use ArtMethod* at a known address, embed the direct address in the code. // Used for app->boot calls with non-relocatable image and for JIT-compiled calls. kDirectAddress, @@ -4273,6 +4301,10 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { bool HasPcRelativeDexCache() const { return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative; } + bool HasPcRelativeMethodLoadKind() const { + return GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative || + GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative; + } bool HasCurrentMethodInput() const { // This function can be called only after the invoke has been fully initialized by the builder. if (NeedsCurrentMethodInput(GetMethodLoadKind())) { @@ -5063,7 +5095,7 @@ class HParameterValue FINAL : public HExpression<0> { const DexFile& GetDexFile() const { return dex_file_; } dex::TypeIndex GetTypeIndex() const { return type_index_; } uint8_t GetIndex() const { return index_; } - bool IsThis() const ATTRIBUTE_UNUSED { return GetPackedFlag<kFlagIsThis>(); } + bool IsThis() const { return GetPackedFlag<kFlagIsThis>(); } bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); } void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); } @@ -5371,10 +5403,16 @@ class HArrayGet FINAL : public HExpression<2> { } bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE { // TODO: We can be smarter here. - // Currently, the array access is always preceded by an ArrayLength or a NullCheck - // which generates the implicit null check. There are cases when these can be removed - // to produce better code. If we ever add optimizations to do so we should allow an - // implicit check here (as long as the address falls in the first page). + // Currently, unless the array is the result of NewArray, the array access is always + // preceded by some form of null NullCheck necessary for the bounds check, usually + // implicit null check on the ArrayLength input to BoundsCheck or Deoptimize for + // dynamic BCE. There are cases when these could be removed to produce better code. + // If we ever add optimizations to do so we should allow an implicit check here + // (as long as the address falls in the first page). + // + // As an example of such fancy optimization, we could eliminate BoundsCheck for + // a = cond ? new int[1] : null; + // a[0]; // The Phi does not need bounds check for either input. return false; } @@ -5639,12 +5677,8 @@ class HLoadClass FINAL : public HInstruction { // Use the Class* from the method's own ArtMethod*. kReferrersClass, - // Use boot image Class* address that will be known at link time. - // Used for boot image classes referenced by boot image code in non-PIC mode. - kBootImageLinkTimeAddress, - // Use PC-relative boot image Class* address that will be known at link time. - // Used for boot image classes referenced by boot image code in PIC mode. + // Used for boot image classes referenced by boot image code. kBootImageLinkTimePcRelative, // Use a known boot image Class* address, embedded in the code by the codegen. @@ -5658,12 +5692,11 @@ class HLoadClass FINAL : public HInstruction { // Load from the root table associated with the JIT compiled method. kJitTableAddress, - // Load from resolved types array accessed through the class loaded from - // the compiled method's own ArtMethod*. This is the default access type when - // all other types are unavailable. - kDexCacheViaMethod, + // Load using a simple runtime call. This is the fall-back load kind when + // the codegen is unable to use another appropriate kind. + kRuntimeCall, - kLast = kDexCacheViaMethod + kLast = kRuntimeCall }; HLoadClass(HCurrentMethod* current_method, @@ -5684,7 +5717,7 @@ class HLoadClass FINAL : public HInstruction { DCHECK(!is_referrers_class || !needs_access_check); SetPackedField<LoadKindField>( - is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kDexCacheViaMethod); + is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kRuntimeCall); SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check); SetPackedFlag<kFlagIsInBootImage>(false); SetPackedFlag<kFlagGenerateClInitCheck>(false); @@ -5718,7 +5751,7 @@ class HLoadClass FINAL : public HInstruction { bool CanCallRuntime() const { return NeedsAccessCheck() || MustGenerateClinitCheck() || - GetLoadKind() == LoadKind::kDexCacheViaMethod || + GetLoadKind() == LoadKind::kRuntimeCall || GetLoadKind() == LoadKind::kBssEntry; } @@ -5728,7 +5761,7 @@ class HLoadClass FINAL : public HInstruction { // If the class is in the boot image, the lookup in the runtime call cannot throw. // This keeps CanThrow() consistent between non-PIC (using kBootImageAddress) and // PIC and subsequently avoids a DCE behavior dependency on the PIC option. - ((GetLoadKind() == LoadKind::kDexCacheViaMethod || + ((GetLoadKind() == LoadKind::kRuntimeCall || GetLoadKind() == LoadKind::kBssEntry) && !IsInBootImage()); } @@ -5747,7 +5780,7 @@ class HLoadClass FINAL : public HInstruction { const DexFile& GetDexFile() const { return dex_file_; } bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { - return GetLoadKind() == LoadKind::kDexCacheViaMethod; + return GetLoadKind() == LoadKind::kRuntimeCall; } static SideEffects SideEffectsForArchRuntimeCalls() { @@ -5796,15 +5829,14 @@ class HLoadClass FINAL : public HInstruction { static bool HasTypeReference(LoadKind load_kind) { return load_kind == LoadKind::kReferrersClass || - load_kind == LoadKind::kBootImageLinkTimeAddress || load_kind == LoadKind::kBootImageLinkTimePcRelative || load_kind == LoadKind::kBssEntry || - load_kind == LoadKind::kDexCacheViaMethod; + load_kind == LoadKind::kRuntimeCall; } void SetLoadKindInternal(LoadKind load_kind); - // The special input is the HCurrentMethod for kDexCacheViaMethod or kReferrersClass. + // The special input is the HCurrentMethod for kRuntimeCall or kReferrersClass. // For other load kinds it's empty or possibly some architecture-specific instruction // for PC-relative loads, i.e. kBssEntry or kBootImageLinkTimePcRelative. HUserRecord<HInstruction*> special_input_; @@ -5813,7 +5845,7 @@ class HLoadClass FINAL : public HInstruction { // - The compiling method's dex file if the class is defined there too. // - The compiling method's dex file if the class is referenced there. // - The dex file where the class is defined. When the load kind can only be - // kBssEntry or kDexCacheViaMethod, we cannot emit code for this `HLoadClass`. + // kBssEntry or kRuntimeCall, we cannot emit code for this `HLoadClass`. const dex::TypeIndex type_index_; const DexFile& dex_file_; @@ -5830,7 +5862,6 @@ inline void HLoadClass::AddSpecialInput(HInstruction* special_input) { // The special input is used for PC-relative loads on some architectures, // including literal pool loads, which are PC-relative too. DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || - GetLoadKind() == LoadKind::kBootImageLinkTimeAddress || GetLoadKind() == LoadKind::kBootImageAddress || GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind(); DCHECK(special_input_.GetInstruction() == nullptr); @@ -5842,12 +5873,8 @@ class HLoadString FINAL : public HInstruction { public: // Determines how to load the String. enum class LoadKind { - // Use boot image String* address that will be known at link time. - // Used for boot image strings referenced by boot image code in non-PIC mode. - kBootImageLinkTimeAddress, - // Use PC-relative boot image String* address that will be known at link time. - // Used for boot image strings referenced by boot image code in PIC mode. + // Used for boot image strings referenced by boot image code. kBootImageLinkTimePcRelative, // Use a known boot image String* address, embedded in the code by the codegen. @@ -5861,12 +5888,11 @@ class HLoadString FINAL : public HInstruction { // Load from the root table associated with the JIT compiled method. kJitTableAddress, - // Load from resolved strings array accessed through the class loaded from - // the compiled method's own ArtMethod*. This is the default access type when - // all other types are unavailable. - kDexCacheViaMethod, + // Load using a simple runtime call. This is the fall-back load kind when + // the codegen is unable to use another appropriate kind. + kRuntimeCall, - kLast = kDexCacheViaMethod, + kLast = kRuntimeCall, }; HLoadString(HCurrentMethod* current_method, @@ -5877,7 +5903,7 @@ class HLoadString FINAL : public HInstruction { special_input_(HUserRecord<HInstruction*>(current_method)), string_index_(string_index), dex_file_(dex_file) { - SetPackedField<LoadKindField>(LoadKind::kDexCacheViaMethod); + SetPackedField<LoadKindField>(LoadKind::kRuntimeCall); } void SetLoadKind(LoadKind load_kind); @@ -5912,8 +5938,7 @@ class HLoadString FINAL : public HInstruction { // the dex cache and the string is not guaranteed to be there yet. bool NeedsEnvironment() const OVERRIDE { LoadKind load_kind = GetLoadKind(); - if (load_kind == LoadKind::kBootImageLinkTimeAddress || - load_kind == LoadKind::kBootImageLinkTimePcRelative || + if (load_kind == LoadKind::kBootImageLinkTimePcRelative || load_kind == LoadKind::kBootImageAddress || load_kind == LoadKind::kJitTableAddress) { return false; @@ -5922,7 +5947,7 @@ class HLoadString FINAL : public HInstruction { } bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { - return GetLoadKind() == LoadKind::kDexCacheViaMethod; + return GetLoadKind() == LoadKind::kRuntimeCall; } bool CanBeNull() const OVERRIDE { return false; } @@ -5956,7 +5981,7 @@ class HLoadString FINAL : public HInstruction { void SetLoadKindInternal(LoadKind load_kind); - // The special input is the HCurrentMethod for kDexCacheViaMethod. + // The special input is the HCurrentMethod for kRuntimeCall. // For other load kinds it's empty or possibly some architecture-specific instruction // for PC-relative loads, i.e. kBssEntry or kBootImageLinkTimePcRelative. HUserRecord<HInstruction*> special_input_; @@ -5976,7 +6001,6 @@ inline void HLoadString::AddSpecialInput(HInstruction* special_input) { // including literal pool loads, which are PC-relative too. DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || GetLoadKind() == LoadKind::kBssEntry || - GetLoadKind() == LoadKind::kBootImageLinkTimeAddress || GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind(); // HLoadString::GetInputRecords() returns an empty array at this point, // so use the GetInputRecords() from the base class to set the input record. @@ -6495,6 +6519,145 @@ class HMemoryBarrier FINAL : public HTemplateInstruction<0> { DISALLOW_COPY_AND_ASSIGN(HMemoryBarrier); }; +// A constructor fence orders all prior stores to fields that could be accessed via a final field of +// the specified object(s), with respect to any subsequent store that might "publish" +// (i.e. make visible) the specified object to another thread. +// +// JLS 17.5.1 "Semantics of final fields" states that a freeze action happens +// for all final fields (that were set) at the end of the invoked constructor. +// +// The constructor fence models the freeze actions for the final fields of an object +// being constructed (semantically at the end of the constructor). Constructor fences +// have a per-object affinity; two separate objects being constructed get two separate +// constructor fences. +// +// (Note: that if calling a super-constructor or forwarding to another constructor, +// the freezes would happen at the end of *that* constructor being invoked). +// +// The memory model guarantees that when the object being constructed is "published" after +// constructor completion (i.e. escapes the current thread via a store), then any final field +// writes must be observable on other threads (once they observe that publication). +// +// Further, anything written before the freeze, and read by dereferencing through the final field, +// must also be visible (so final object field could itself have an object with non-final fields; +// yet the freeze must also extend to them). +// +// Constructor example: +// +// class HasFinal { +// final int field; Optimizing IR for <init>()V: +// HasFinal() { +// field = 123; HInstanceFieldSet(this, HasFinal.field, 123) +// // freeze(this.field); HConstructorFence(this) +// } HReturn +// } +// +// HConstructorFence can serve double duty as a fence for new-instance/new-array allocations of +// already-initialized classes; in that case the allocation must act as a "default-initializer" +// of the object which effectively writes the class pointer "final field". +// +// For example, we can model default-initialiation as roughly the equivalent of the following: +// +// class Object { +// private final Class header; +// } +// +// Java code: Optimizing IR: +// +// T new_instance<T>() { +// Object obj = allocate_memory(T.class.size); obj = HInvoke(art_quick_alloc_object, T) +// obj.header = T.class; // header write is done by above call. +// // freeze(obj.header) HConstructorFence(obj) +// return (T)obj; +// } +// +// See also: +// * CompilerDriver::RequiresConstructorBarrier +// * QuasiAtomic::ThreadFenceForConstructor +// +class HConstructorFence FINAL : public HVariableInputSizeInstruction { + // A fence has variable inputs because the inputs can be removed + // after prepare_for_register_allocation phase. + // (TODO: In the future a fence could freeze multiple objects + // after merging two fences together.) + public: + // `fence_object` is the reference that needs to be protected for correct publication. + // + // It makes sense in the following situations: + // * <init> constructors, it's the "this" parameter (i.e. HParameterValue, s.t. IsThis() == true). + // * new-instance-like instructions, it's the return value (i.e. HNewInstance). + // + // After construction the `fence_object` becomes the 0th input. + // This is not an input in a real sense, but just a convenient place to stash the information + // about the associated object. + HConstructorFence(HInstruction* fence_object, + uint32_t dex_pc, + ArenaAllocator* arena) + // We strongly suspect there is not a more accurate way to describe the fine-grained reordering + // constraints described in the class header. We claim that these SideEffects constraints + // enforce a superset of the real constraints. + // + // The ordering described above is conservatively modeled with SideEffects as follows: + // + // * To prevent reordering of the publication stores: + // ----> "Reads of objects" is the initial SideEffect. + // * For every primitive final field store in the constructor: + // ----> Union that field's type as a read (e.g. "Read of T") into the SideEffect. + // * If there are any stores to reference final fields in the constructor: + // ----> Use a more conservative "AllReads" SideEffect because any stores to any references + // that are reachable from `fence_object` also need to be prevented for reordering + // (and we do not want to do alias analysis to figure out what those stores are). + // + // In the implementation, this initially starts out as an "all reads" side effect; this is an + // even more conservative approach than the one described above, and prevents all of the + // above reordering without analyzing any of the instructions in the constructor. + // + // If in a later phase we discover that there are no writes to reference final fields, + // we can refine the side effect to a smaller set of type reads (see above constraints). + : HVariableInputSizeInstruction(SideEffects::AllReads(), + dex_pc, + arena, + /* number_of_inputs */ 1, + kArenaAllocConstructorFenceInputs) { + DCHECK(fence_object != nullptr); + SetRawInputAt(0, fence_object); + } + + // The object associated with this constructor fence. + // + // (Note: This will be null after the prepare_for_register_allocation phase, + // as all constructor fence inputs are removed there). + HInstruction* GetFenceObject() const { + return InputAt(0); + } + + // Find all the HConstructorFence uses (`fence_use`) for `this` and: + // - Delete `fence_use` from `this`'s use list. + // - Delete `this` from `fence_use`'s inputs list. + // - If the `fence_use` is dead, remove it from the graph. + // + // A fence is considered dead once it no longer has any uses + // and all of the inputs are dead. + // + // This must *not* be called during/after prepare_for_register_allocation, + // because that removes all the inputs to the fences but the fence is actually + // still considered live. + static void RemoveConstructorFences(HInstruction* instruction); + + // Check if this constructor fence is protecting + // an HNewInstance or HNewArray that is also the immediate + // predecessor of `this`. + // + // Returns the associated HNewArray or HNewInstance, + // or null otherwise. + HInstruction* GetAssociatedAllocation(); + + DECLARE_INSTRUCTION(ConstructorFence); + + private: + DISALLOW_COPY_AND_ASSIGN(HConstructorFence); +}; + class HMonitorOperation FINAL : public HTemplateInstruction<1> { public: enum class OperationKind { diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h index c6bfbcc7fb..075a816f3f 100644 --- a/compiler/optimizing/nodes_shared.h +++ b/compiler/optimizing/nodes_shared.h @@ -150,6 +150,49 @@ class HIntermediateAddress FINAL : public HExpression<2> { DISALLOW_COPY_AND_ASSIGN(HIntermediateAddress); }; +// This instruction computes part of the array access offset (data and index offset). +// +// For array accesses the element address has the following structure: +// Address = CONST_OFFSET + base_addr + index << ELEM_SHIFT. Taking into account LDR/STR addressing +// modes address part (CONST_OFFSET + index << ELEM_SHIFT) can be shared across array access with +// the same data type and index. For example, for the following loop 5 accesses can share address +// computation: +// +// void foo(int[] a, int[] b, int[] c) { +// for (i...) { +// a[i] = a[i] + 5; +// b[i] = b[i] + c[i]; +// } +// } +// +// Note: as the instruction doesn't involve base array address into computations it has no side +// effects (in comparison of HIntermediateAddress). +class HIntermediateAddressIndex FINAL : public HExpression<3> { + public: + HIntermediateAddressIndex( + HInstruction* index, HInstruction* offset, HInstruction* shift, uint32_t dex_pc) + : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) { + SetRawInputAt(0, index); + SetRawInputAt(1, offset); + SetRawInputAt(2, shift); + } + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + return true; + } + bool IsActualObject() const OVERRIDE { return false; } + + HInstruction* GetIndex() const { return InputAt(0); } + HInstruction* GetOffset() const { return InputAt(1); } + HInstruction* GetShift() const { return InputAt(2); } + + DECLARE_INSTRUCTION(IntermediateAddressIndex); + + private: + DISALLOW_COPY_AND_ASSIGN(HIntermediateAddressIndex); +}; + class HDataProcWithShifterOp FINAL : public HExpression<2> { public: enum OpKind { diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index 52c247b52f..5dbe29b4fa 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -178,12 +178,17 @@ class HVecMemoryOperation : public HVecOperation { size_t vector_length, uint32_t dex_pc) : HVecOperation(arena, packed_type, side_effects, number_of_inputs, vector_length, dex_pc), - alignment_(Primitive::ComponentSize(packed_type), 0) { } + alignment_(Primitive::ComponentSize(packed_type), 0) { + DCHECK_GE(number_of_inputs, 2u); + } void SetAlignment(Alignment alignment) { alignment_ = alignment; } Alignment GetAlignment() const { return alignment_; } + HInstruction* GetArray() const { return InputAt(0); } + HInstruction* GetIndex() const { return InputAt(1); } + DECLARE_ABSTRACT_INSTRUCTION(VecMemoryOperation); private: @@ -451,13 +456,24 @@ class HVecMin FINAL : public HVecBinaryOperation { HInstruction* right, Primitive::Type packed_type, size_t vector_length, + bool is_unsigned, uint32_t dex_pc = kNoDexPc) : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); + SetPackedFlag<kFieldMinOpIsUnsigned>(is_unsigned); } + + bool IsUnsigned() const { return GetPackedFlag<kFieldMinOpIsUnsigned>(); } + DECLARE_INSTRUCTION(VecMin); + private: + // Additional packed bits. + static constexpr size_t kFieldMinOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits; + static constexpr size_t kNumberOfMinOpPackedBits = kFieldMinOpIsUnsigned + 1; + static_assert(kNumberOfMinOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); + DISALLOW_COPY_AND_ASSIGN(HVecMin); }; @@ -470,13 +486,24 @@ class HVecMax FINAL : public HVecBinaryOperation { HInstruction* right, Primitive::Type packed_type, size_t vector_length, + bool is_unsigned, uint32_t dex_pc = kNoDexPc) : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); + SetPackedFlag<kFieldMaxOpIsUnsigned>(is_unsigned); } + + bool IsUnsigned() const { return GetPackedFlag<kFieldMaxOpIsUnsigned>(); } + DECLARE_INSTRUCTION(VecMax); + private: + // Additional packed bits. + static constexpr size_t kFieldMaxOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits; + static constexpr size_t kNumberOfMaxOpPackedBits = kFieldMaxOpIsUnsigned + 1; + static_assert(kNumberOfMaxOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); + DISALLOW_COPY_AND_ASSIGN(HVecMax); }; diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 065c11eddb..e5ab00bce3 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -83,6 +83,7 @@ #include "jit/jit_code_cache.h" #include "jni/quick/jni_compiler.h" #include "licm.h" +#include "load_store_analysis.h" #include "load_store_elimination.h" #include "loop_optimization.h" #include "nodes.h" @@ -465,7 +466,8 @@ static HOptimization* BuildOptimization( const DexCompilationUnit& dex_compilation_unit, VariableSizedHandleScope* handles, SideEffectsAnalysis* most_recent_side_effects, - HInductionVarAnalysis* most_recent_induction) { + HInductionVarAnalysis* most_recent_induction, + LoadStoreAnalysis* most_recent_lsa) { std::string opt_name = ConvertPassNameToOptimizationName(pass_name); if (opt_name == BoundsCheckElimination::kBoundsCheckEliminationPassName) { CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr); @@ -499,15 +501,18 @@ static HOptimization* BuildOptimization( } else if (opt_name == HInductionVarAnalysis::kInductionPassName) { return new (arena) HInductionVarAnalysis(graph); } else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) { - return new (arena) InstructionSimplifier(graph, codegen, stats, pass_name.c_str()); + return new (arena) InstructionSimplifier(graph, codegen, driver, stats, pass_name.c_str()); } else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) { return new (arena) IntrinsicsRecognizer(graph, stats); } else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) { CHECK(most_recent_side_effects != nullptr); return new (arena) LICM(graph, *most_recent_side_effects, stats); + } else if (opt_name == LoadStoreAnalysis::kLoadStoreAnalysisPassName) { + return new (arena) LoadStoreAnalysis(graph); } else if (opt_name == LoadStoreElimination::kLoadStoreEliminationPassName) { CHECK(most_recent_side_effects != nullptr); - return new (arena) LoadStoreElimination(graph, *most_recent_side_effects); + CHECK(most_recent_lsa != nullptr); + return new (arena) LoadStoreElimination(graph, *most_recent_side_effects, *most_recent_lsa); } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) { return new (arena) SideEffectsAnalysis(graph); } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) { @@ -556,6 +561,7 @@ static ArenaVector<HOptimization*> BuildOptimizations( // in the pass name list. SideEffectsAnalysis* most_recent_side_effects = nullptr; HInductionVarAnalysis* most_recent_induction = nullptr; + LoadStoreAnalysis* most_recent_lsa = nullptr; ArenaVector<HOptimization*> ret(arena->Adapter()); for (const std::string& pass_name : pass_names) { HOptimization* opt = BuildOptimization( @@ -568,7 +574,8 @@ static ArenaVector<HOptimization*> BuildOptimizations( dex_compilation_unit, handles, most_recent_side_effects, - most_recent_induction); + most_recent_induction, + most_recent_lsa); CHECK(opt != nullptr) << "Couldn't build optimization: \"" << pass_name << "\""; ret.push_back(opt); @@ -577,6 +584,8 @@ static ArenaVector<HOptimization*> BuildOptimizations( most_recent_side_effects = down_cast<SideEffectsAnalysis*>(opt); } else if (opt_name == HInductionVarAnalysis::kInductionPassName) { most_recent_induction = down_cast<HInductionVarAnalysis*>(opt); + } else if (opt_name == LoadStoreAnalysis::kLoadStoreAnalysisPassName) { + most_recent_lsa = down_cast<LoadStoreAnalysis*>(opt); } } return ret; @@ -638,11 +647,14 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, new (arena) arm::InstructionSimplifierArm(graph, stats); SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch"); + HInstructionScheduling* scheduling = + new (arena) HInstructionScheduling(graph, instruction_set, codegen); HOptimization* arm_optimizations[] = { simplifier, side_effects, gvn, - fixups + fixups, + scheduling, }; RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer); break; @@ -760,7 +772,8 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, HDeadCodeElimination* dce3 = new (arena) HDeadCodeElimination( graph, stats, "dead_code_elimination$final"); HConstantFolding* fold1 = new (arena) HConstantFolding(graph, "constant_folding"); - InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, codegen, stats); + InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier( + graph, codegen, driver, stats); HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats); HConstantFolding* fold2 = new (arena) HConstantFolding( graph, "constant_folding$after_inlining"); @@ -774,15 +787,16 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph); BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects1, induction); HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction); - LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2); + LoadStoreAnalysis* lsa = new (arena) LoadStoreAnalysis(graph); + LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2, *lsa); HSharpening* sharpening = new (arena) HSharpening( graph, codegen, dex_compilation_unit, driver, handles); InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier( - graph, codegen, stats, "instruction_simplifier$after_inlining"); + graph, codegen, driver, stats, "instruction_simplifier$after_inlining"); InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier( - graph, codegen, stats, "instruction_simplifier$after_bce"); + graph, codegen, driver, stats, "instruction_simplifier$after_bce"); InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier( - graph, codegen, stats, "instruction_simplifier$before_codegen"); + graph, codegen, driver, stats, "instruction_simplifier$before_codegen"); IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats); CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph); CodeSinking* code_sinking = new (arena) CodeSinking(graph, stats); @@ -814,6 +828,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, fold3, // evaluates code generated by dynamic bce simplify3, side_effects2, + lsa, lse, cha_guard, dce3, diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc index a0fdde169d..bce54bf49a 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.cc +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -58,10 +58,22 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { DCHECK(base_ != nullptr); } + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + // If this is an invoke with PC-relative pointer to a method, + // we need to add the base as the special input. + if (invoke->GetMethodLoadKind() == + HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative && + !IsCallFreeIntrinsic<IntrinsicLocationsBuilderMIPS>(invoke, codegen_)) { + InitializePCRelativeBasePointer(); + // Add the special argument base to the method. + DCHECK(!invoke->HasCurrentMethodInput()); + invoke->AddSpecialInput(base_); + } + } + void VisitLoadClass(HLoadClass* load_class) OVERRIDE { HLoadClass::LoadKind load_kind = load_class->GetLoadKind(); switch (load_kind) { - case HLoadClass::LoadKind::kBootImageLinkTimeAddress: case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: case HLoadClass::LoadKind::kBootImageAddress: case HLoadClass::LoadKind::kBssEntry: @@ -77,7 +89,6 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void VisitLoadString(HLoadString* load_string) OVERRIDE { HLoadString::LoadKind load_kind = load_string->GetLoadKind(); switch (load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimeAddress: case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: case HLoadString::LoadKind::kBssEntry: diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index a1c916f43a..2743df9dcf 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -205,13 +205,13 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { // method pointer from the invoke. if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasCurrentMethodInput()) { - DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache()); + DCHECK(!invoke_static_or_direct->HasPcRelativeMethodLoadKind()); return; } bool base_added = false; if (invoke_static_or_direct != nullptr && - invoke_static_or_direct->HasPcRelativeDexCache() && + invoke_static_or_direct->HasPcRelativeMethodLoadKind() && !IsCallFreeIntrinsic<IntrinsicLocationsBuilderX86>(invoke, codegen_)) { HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(invoke); // Add the extra parameter. diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index 66bfea9860..aa42fd647b 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -167,6 +167,45 @@ void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) { } } +void PrepareForRegisterAllocation::VisitConstructorFence(HConstructorFence* constructor_fence) { + // Trivially remove redundant HConstructorFence when it immediately follows an HNewInstance + // to an uninitialized class. In this special case, the art_quick_alloc_object_resolved + // will already have the 'dmb' which is strictly stronger than an HConstructorFence. + // + // The instruction builder always emits "x = HNewInstance; HConstructorFence(x)" so this + // is effectively pattern-matching that particular case and undoing the redundancy the builder + // had introduced. + // + // TODO: Move this to a separate pass. + HInstruction* allocation_inst = constructor_fence->GetAssociatedAllocation(); + if (allocation_inst != nullptr && allocation_inst->IsNewInstance()) { + HNewInstance* new_inst = allocation_inst->AsNewInstance(); + // This relies on the entrypoint already being set to the more optimized version; + // as that happens in this pass, this redundancy removal also cannot happen any earlier. + if (new_inst != nullptr && new_inst->GetEntrypoint() == kQuickAllocObjectResolved) { + // If this was done in an earlier pass, we would want to match that `previous` was an input + // to the `constructor_fence`. However, since this pass removes the inputs to the fence, + // we can ignore the inputs and just remove the instruction from its block. + DCHECK_EQ(1u, constructor_fence->InputCount()); + // TODO: GetAssociatedAllocation should not care about multiple inputs + // if we are in prepare_for_register_allocation pass only. + constructor_fence->GetBlock()->RemoveInstruction(constructor_fence); + return; + // TODO: actually remove the dmb from the .S entrypoints (initialized variants only). + } + + // HNewArray does not need this check because the art_quick_alloc_array does not itself + // have a dmb in any normal situation (i.e. the array class is never exactly in the + // "resolved" state). If the array class is not yet loaded, it will always go from + // Unloaded->Initialized state. + } + + // Remove all the inputs to the constructor fence; + // they aren't used by the InstructionCodeGenerator and this lets us avoid creating a + // LocationSummary in the LocationsBuilder. + constructor_fence->RemoveAllInputs(); +} + void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { if (invoke->IsStaticWithExplicitClinitCheck()) { HLoadClass* last_input = invoke->GetInputs().back()->AsLoadClass(); diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index 7ffbe44ef6..395d4ba2ee 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -43,6 +43,7 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { void VisitArraySet(HArraySet* instruction) OVERRIDE; void VisitClinitCheck(HClinitCheck* check) OVERRIDE; void VisitCondition(HCondition* condition) OVERRIDE; + void VisitConstructorFence(HConstructorFence* constructor_fence) OVERRIDE; void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE; diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index 87f709f63d..2fd7b03151 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -20,7 +20,7 @@ #include "linear_order.h" #include "register_allocation_resolver.h" #include "ssa_liveness_analysis.h" -#include "thread-inl.h" +#include "thread-current-inl.h" namespace art { @@ -1968,8 +1968,7 @@ void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* in ArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints( allocator_->Adapter(kArenaAllocRegisterAllocator)); - for (auto it = intervals->begin(), e = intervals->end(); it != e; ++it) { - LiveInterval* parent_interval = *it; + for (LiveInterval* parent_interval : *intervals) { DCHECK(parent_interval->IsParent()); DCHECK(!parent_interval->HasSpillSlot()); size_t start = parent_interval->GetStart(); diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc index d65d20cf43..320f01a727 100644 --- a/compiler/optimizing/scheduler.cc +++ b/compiler/optimizing/scheduler.cc @@ -23,6 +23,10 @@ #include "scheduler_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_arm +#include "scheduler_arm.h" +#endif + namespace art { void SchedulingGraph::AddDependency(SchedulingNode* node, @@ -264,10 +268,11 @@ void SchedulingGraph::DumpAsDotGraph(const std::string& description, // Start the dot graph. Use an increasing index for easier differentiation. output << "digraph G {\n"; for (const auto& entry : nodes_map_) { - DumpAsDotNode(output, entry.second); + SchedulingNode* node = entry.second; + DumpAsDotNode(output, node); } // Create a fake 'end_of_scheduling' node to help visualization of critical_paths. - for (auto node : initial_candidates) { + for (SchedulingNode* node : initial_candidates) { const HInstruction* instruction = node->GetInstruction(); output << InstructionTypeId(instruction) << ":s -> end_of_scheduling:n " << "[label=\"" << node->GetLatency() << "\",dir=back]\n"; @@ -580,28 +585,39 @@ bool HScheduler::IsSchedulingBarrier(const HInstruction* instr) const { void HInstructionScheduling::Run(bool only_optimize_loop_blocks, bool schedule_randomly) { +#if defined(ART_ENABLE_CODEGEN_arm64) || defined(ART_ENABLE_CODEGEN_arm) + // Phase-local allocator that allocates scheduler internal data structures like + // scheduling nodes, internel nodes map, dependencies, etc. + ArenaAllocator arena_allocator(graph_->GetArena()->GetArenaPool()); + CriticalPathSchedulingNodeSelector critical_path_selector; + RandomSchedulingNodeSelector random_selector; + SchedulingNodeSelector* selector = schedule_randomly + ? static_cast<SchedulingNodeSelector*>(&random_selector) + : static_cast<SchedulingNodeSelector*>(&critical_path_selector); +#else // Avoid compilation error when compiling for unsupported instruction set. UNUSED(only_optimize_loop_blocks); UNUSED(schedule_randomly); +#endif switch (instruction_set_) { #ifdef ART_ENABLE_CODEGEN_arm64 case kArm64: { - // Phase-local allocator that allocates scheduler internal data structures like - // scheduling nodes, internel nodes map, dependencies, etc. - ArenaAllocator arena_allocator(graph_->GetArena()->GetArenaPool()); - - CriticalPathSchedulingNodeSelector critical_path_selector; - RandomSchedulingNodeSelector random_selector; - SchedulingNodeSelector* selector = schedule_randomly - ? static_cast<SchedulingNodeSelector*>(&random_selector) - : static_cast<SchedulingNodeSelector*>(&critical_path_selector); - arm64::HSchedulerARM64 scheduler(&arena_allocator, selector); scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks); scheduler.Schedule(graph_); break; } #endif +#if defined(ART_ENABLE_CODEGEN_arm) + case kThumb2: + case kArm: { + arm::SchedulingLatencyVisitorARM arm_latency_visitor(codegen_); + arm::HSchedulerARM scheduler(&arena_allocator, selector, &arm_latency_visitor); + scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks); + scheduler.Schedule(graph_); + break; + } +#endif default: break; } diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h index 9236a0e4fa..73e8087cd0 100644 --- a/compiler/optimizing/scheduler.h +++ b/compiler/optimizing/scheduler.h @@ -23,6 +23,7 @@ #include "driver/compiler_driver.h" #include "nodes.h" #include "optimization.h" +#include "code_generator.h" namespace art { @@ -469,8 +470,9 @@ inline bool SchedulingGraph::IsSchedulingBarrier(const HInstruction* instruction class HInstructionScheduling : public HOptimization { public: - HInstructionScheduling(HGraph* graph, InstructionSet instruction_set) + HInstructionScheduling(HGraph* graph, InstructionSet instruction_set, CodeGenerator* cg = nullptr) : HOptimization(graph, kInstructionScheduling), + codegen_(cg), instruction_set_(instruction_set) {} void Run() { @@ -480,6 +482,7 @@ class HInstructionScheduling : public HOptimization { static constexpr const char* kInstructionScheduling = "scheduler"; + CodeGenerator* const codegen_; const InstructionSet instruction_set_; private: diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc new file mode 100644 index 0000000000..832a7e1571 --- /dev/null +++ b/compiler/optimizing/scheduler_arm.cc @@ -0,0 +1,827 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arch/arm/instruction_set_features_arm.h" +#include "code_generator_utils.h" +#include "common_arm.h" +#include "mirror/array-inl.h" +#include "scheduler_arm.h" + +namespace art { +namespace arm { + +using helpers::Int32ConstantFrom; +using helpers::Uint64ConstantFrom; + +void SchedulingLatencyVisitorARM::HandleBinaryOperationLantencies(HBinaryOperation* instr) { + switch (instr->GetResultType()) { + case Primitive::kPrimLong: + // HAdd and HSub long operations translate to ADDS+ADC or SUBS+SBC pairs, + // so a bubble (kArmNopLatency) is added to represent the internal carry flag + // dependency inside these pairs. + last_visited_internal_latency_ = kArmIntegerOpLatency + kArmNopLatency; + last_visited_latency_ = kArmIntegerOpLatency; + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + last_visited_latency_ = kArmFloatingPointOpLatency; + break; + default: + last_visited_latency_ = kArmIntegerOpLatency; + break; + } +} + +void SchedulingLatencyVisitorARM::VisitAdd(HAdd* instr) { + HandleBinaryOperationLantencies(instr); +} + +void SchedulingLatencyVisitorARM::VisitSub(HSub* instr) { + HandleBinaryOperationLantencies(instr); +} + +void SchedulingLatencyVisitorARM::VisitMul(HMul* instr) { + switch (instr->GetResultType()) { + case Primitive::kPrimLong: + last_visited_internal_latency_ = 3 * kArmMulIntegerLatency; + last_visited_latency_ = kArmIntegerOpLatency; + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + last_visited_latency_ = kArmMulFloatingPointLatency; + break; + default: + last_visited_latency_ = kArmMulIntegerLatency; + break; + } +} + +void SchedulingLatencyVisitorARM::HandleBitwiseOperationLantencies(HBinaryOperation* instr) { + switch (instr->GetResultType()) { + case Primitive::kPrimLong: + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + last_visited_latency_ = kArmFloatingPointOpLatency; + break; + default: + last_visited_latency_ = kArmIntegerOpLatency; + break; + } +} + +void SchedulingLatencyVisitorARM::VisitAnd(HAnd* instr) { + HandleBitwiseOperationLantencies(instr); +} + +void SchedulingLatencyVisitorARM::VisitOr(HOr* instr) { + HandleBitwiseOperationLantencies(instr); +} + +void SchedulingLatencyVisitorARM::VisitXor(HXor* instr) { + HandleBitwiseOperationLantencies(instr); +} + +void SchedulingLatencyVisitorARM::VisitRor(HRor* instr) { + switch (instr->GetResultType()) { + case Primitive::kPrimInt: + last_visited_latency_ = kArmIntegerOpLatency; + break; + case Primitive::kPrimLong: { + // HandleLongRotate + HInstruction* rhs = instr->GetRight(); + if (rhs->IsConstant()) { + uint64_t rot = Uint64ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance; + if (rot != 0u) { + last_visited_internal_latency_ = 3 * kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } else { + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } + } else { + last_visited_internal_latency_ = 9 * kArmIntegerOpLatency + kArmBranchLatency; + last_visited_latency_ = kArmBranchLatency; + } + break; + } + default: + LOG(FATAL) << "Unexpected operation type " << instr->GetResultType(); + UNREACHABLE(); + } +} + +void SchedulingLatencyVisitorARM::HandleShiftLatencies(HBinaryOperation* instr) { + Primitive::Type type = instr->GetResultType(); + HInstruction* rhs = instr->GetRight(); + switch (type) { + case Primitive::kPrimInt: + if (!rhs->IsConstant()) { + last_visited_internal_latency_ = kArmIntegerOpLatency; + } + last_visited_latency_ = kArmIntegerOpLatency; + break; + case Primitive::kPrimLong: + if (!rhs->IsConstant()) { + last_visited_internal_latency_ = 8 * kArmIntegerOpLatency; + } else { + uint32_t shift_value = Int32ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance; + if (shift_value == 1 || shift_value >= 32) { + last_visited_internal_latency_ = kArmIntegerOpLatency; + } else { + last_visited_internal_latency_ = 2 * kArmIntegerOpLatency; + } + } + last_visited_latency_ = kArmIntegerOpLatency; + break; + default: + LOG(FATAL) << "Unexpected operation type " << type; + UNREACHABLE(); + } +} + +void SchedulingLatencyVisitorARM::VisitShl(HShl* instr) { + HandleShiftLatencies(instr); +} + +void SchedulingLatencyVisitorARM::VisitShr(HShr* instr) { + HandleShiftLatencies(instr); +} + +void SchedulingLatencyVisitorARM::VisitUShr(HUShr* instr) { + HandleShiftLatencies(instr); +} + +void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) { + switch (instr->GetLeft()->GetType()) { + case Primitive::kPrimLong: + last_visited_internal_latency_ = 4 * kArmIntegerOpLatency; + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + last_visited_internal_latency_ = 2 * kArmFloatingPointOpLatency; + break; + default: + last_visited_internal_latency_ = 2 * kArmIntegerOpLatency; + break; + } + last_visited_latency_ = kArmIntegerOpLatency; +} + +void SchedulingLatencyVisitorARM::VisitCompare(HCompare* instr) { + Primitive::Type type = instr->InputAt(0)->GetType(); + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + case Primitive::kPrimInt: + last_visited_internal_latency_ = 2 * kArmIntegerOpLatency; + break; + case Primitive::kPrimLong: + last_visited_internal_latency_ = 2 * kArmIntegerOpLatency + 3 * kArmBranchLatency; + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + last_visited_internal_latency_ = kArmIntegerOpLatency + 2 * kArmFloatingPointOpLatency; + break; + default: + last_visited_internal_latency_ = 2 * kArmIntegerOpLatency; + break; + } + last_visited_latency_ = kArmIntegerOpLatency; +} + +void SchedulingLatencyVisitorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) { + if (instruction->GetResultType() == Primitive::kPrimInt) { + last_visited_latency_ = kArmIntegerOpLatency; + } else { + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } +} + +void SchedulingLatencyVisitorARM::HandleGenerateDataProcInstruction(bool internal_latency) { + if (internal_latency) { + last_visited_internal_latency_ += kArmIntegerOpLatency; + } else { + last_visited_latency_ = kArmDataProcWithShifterOpLatency; + } +} + +void SchedulingLatencyVisitorARM::HandleGenerateDataProc(HDataProcWithShifterOp* instruction) { + const HInstruction::InstructionKind kind = instruction->GetInstrKind(); + if (kind == HInstruction::kAdd) { + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } else if (kind == HInstruction::kSub) { + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } else { + HandleGenerateDataProcInstruction(/* internal_latency */ true); + HandleGenerateDataProcInstruction(); + } +} + +void SchedulingLatencyVisitorARM::HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction) { + DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong); + DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())); + + const uint32_t shift_value = instruction->GetShiftAmount(); + const HInstruction::InstructionKind kind = instruction->GetInstrKind(); + + if (shift_value >= 32) { + // Different shift types actually generate similar code here, + // no need to differentiate shift types like the codegen pass does, + // which also avoids handling shift types from different ARM backends. + HandleGenerateDataProc(instruction); + } else { + DCHECK_GT(shift_value, 1U); + DCHECK_LT(shift_value, 32U); + + if (kind == HInstruction::kOr || kind == HInstruction::kXor) { + HandleGenerateDataProcInstruction(/* internal_latency */ true); + HandleGenerateDataProcInstruction(/* internal_latency */ true); + HandleGenerateDataProcInstruction(); + } else { + last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; + HandleGenerateDataProc(instruction); + } + } +} + +void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) { + const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); + + if (instruction->GetType() == Primitive::kPrimInt) { + DCHECK(!HDataProcWithShifterOp::IsExtensionOp(op_kind)); + HandleGenerateDataProcInstruction(); + } else { + DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong); + if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) { + HandleGenerateDataProc(instruction); + } else { + HandleGenerateLongDataProc(instruction); + } + } +} + +void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* ATTRIBUTE_UNUSED) { + // Although the code generated is a simple `add` instruction, we found through empirical results + // that spacing it from its use in memory accesses was beneficial. + last_visited_internal_latency_ = kArmNopLatency; + last_visited_latency_ = kArmIntegerOpLatency; +} + +void SchedulingLatencyVisitorARM::VisitIntermediateAddressIndex( + HIntermediateAddressIndex* ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "IntermediateAddressIndex is not implemented for ARM"; +} + +void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArmMulIntegerLatency; +} + +void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) { + Primitive::Type type = instruction->GetType(); + const bool maybe_compressed_char_at = + mirror::kUseStringCompression && instruction->IsStringCharAt(); + HInstruction* array_instr = instruction->GetArray(); + bool has_intermediate_address = array_instr->IsIntermediateAddress(); + HInstruction* index = instruction->InputAt(1); + + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + case Primitive::kPrimInt: { + if (maybe_compressed_char_at) { + last_visited_internal_latency_ += kArmMemoryLoadLatency; + } + if (index->IsConstant()) { + if (maybe_compressed_char_at) { + last_visited_internal_latency_ += + kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency; + last_visited_latency_ = kArmBranchLatency; + } else { + last_visited_latency_ += kArmMemoryLoadLatency; + } + } else { + if (has_intermediate_address) { + } else { + last_visited_internal_latency_ += kArmIntegerOpLatency; + } + if (maybe_compressed_char_at) { + last_visited_internal_latency_ += + kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency; + last_visited_latency_ = kArmBranchLatency; + } else { + last_visited_latency_ += kArmMemoryLoadLatency; + } + } + break; + } + + case Primitive::kPrimNot: { + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency; + } else { + if (index->IsConstant()) { + last_visited_latency_ = kArmMemoryLoadLatency; + } else { + if (has_intermediate_address) { + } else { + last_visited_internal_latency_ += kArmIntegerOpLatency; + } + last_visited_internal_latency_ = kArmMemoryLoadLatency; + } + } + break; + } + + case Primitive::kPrimLong: { + if (index->IsConstant()) { + last_visited_latency_ = kArmMemoryLoadLatency; + } else { + last_visited_internal_latency_ += kArmIntegerOpLatency; + last_visited_latency_ = kArmMemoryLoadLatency; + } + break; + } + + case Primitive::kPrimFloat: { + if (index->IsConstant()) { + last_visited_latency_ = kArmMemoryLoadLatency; + } else { + last_visited_internal_latency_ += kArmIntegerOpLatency; + last_visited_latency_ = kArmMemoryLoadLatency; + } + break; + } + + case Primitive::kPrimDouble: { + if (index->IsConstant()) { + last_visited_latency_ = kArmMemoryLoadLatency; + } else { + last_visited_internal_latency_ += kArmIntegerOpLatency; + last_visited_latency_ = kArmMemoryLoadLatency; + } + break; + } + + default: + LOG(FATAL) << "Unreachable type " << type; + UNREACHABLE(); + } +} + +void SchedulingLatencyVisitorARM::VisitArrayLength(HArrayLength* instruction) { + last_visited_latency_ = kArmMemoryLoadLatency; + if (mirror::kUseStringCompression && instruction->IsStringLength()) { + last_visited_internal_latency_ = kArmMemoryLoadLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } +} + +void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) { + HInstruction* index = instruction->InputAt(1); + Primitive::Type value_type = instruction->GetComponentType(); + HInstruction* array_instr = instruction->GetArray(); + bool has_intermediate_address = array_instr->IsIntermediateAddress(); + + switch (value_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + case Primitive::kPrimInt: { + if (index->IsConstant()) { + last_visited_latency_ = kArmMemoryStoreLatency; + } else { + if (has_intermediate_address) { + } else { + last_visited_internal_latency_ = kArmIntegerOpLatency; + } + last_visited_latency_ = kArmMemoryStoreLatency; + } + break; + } + + case Primitive::kPrimNot: { + if (instruction->InputAt(2)->IsNullConstant()) { + if (index->IsConstant()) { + last_visited_latency_ = kArmMemoryStoreLatency; + } else { + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmMemoryStoreLatency; + } + } else { + // Following the exact instructions of runtime type checks is too complicated, + // just giving it a simple slow latency. + last_visited_latency_ = kArmRuntimeTypeCheckLatency; + } + break; + } + + case Primitive::kPrimLong: { + if (index->IsConstant()) { + last_visited_latency_ = kArmMemoryLoadLatency; + } else { + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmMemoryLoadLatency; + } + break; + } + + case Primitive::kPrimFloat: { + if (index->IsConstant()) { + last_visited_latency_ = kArmMemoryLoadLatency; + } else { + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmMemoryLoadLatency; + } + break; + } + + case Primitive::kPrimDouble: { + if (index->IsConstant()) { + last_visited_latency_ = kArmMemoryLoadLatency; + } else { + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmMemoryLoadLatency; + } + break; + } + + default: + LOG(FATAL) << "Unreachable type " << value_type; + UNREACHABLE(); + } +} + +void SchedulingLatencyVisitorARM::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) { + last_visited_internal_latency_ = kArmIntegerOpLatency; + // Users do not use any data results. + last_visited_latency_ = 0; +} + +void SchedulingLatencyVisitorARM::HandleDivRemConstantIntegralLatencies(int32_t imm) { + if (imm == 0) { + last_visited_internal_latency_ = 0; + last_visited_latency_ = 0; + } else if (imm == 1 || imm == -1) { + last_visited_latency_ = kArmIntegerOpLatency; + } else if (IsPowerOfTwo(AbsOrMin(imm))) { + last_visited_internal_latency_ = 3 * kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } else { + last_visited_internal_latency_ = kArmMulIntegerLatency + 2 * kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } +} + +void SchedulingLatencyVisitorARM::VisitDiv(HDiv* instruction) { + Primitive::Type type = instruction->GetResultType(); + switch (type) { + case Primitive::kPrimInt: { + HInstruction* rhs = instruction->GetRight(); + if (rhs->IsConstant()) { + int32_t imm = Int32ConstantFrom(rhs->AsConstant()); + HandleDivRemConstantIntegralLatencies(imm); + } else { + last_visited_latency_ = kArmDivIntegerLatency; + } + break; + } + case Primitive::kPrimFloat: + last_visited_latency_ = kArmDivFloatLatency; + break; + case Primitive::kPrimDouble: + last_visited_latency_ = kArmDivDoubleLatency; + break; + default: + last_visited_internal_latency_ = kArmCallInternalLatency; + last_visited_latency_ = kArmCallLatency; + break; + } +} + +void SchedulingLatencyVisitorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGetLatencies(instruction, instruction->GetFieldInfo()); +} + +void SchedulingLatencyVisitorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSetLatencies(instruction, instruction->GetFieldInfo()); +} + +void SchedulingLatencyVisitorARM::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) { + last_visited_internal_latency_ = kArmCallInternalLatency; + last_visited_latency_ = kArmIntegerOpLatency; +} + +void SchedulingLatencyVisitorARM::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) { + last_visited_internal_latency_ = kArmCallInternalLatency; + last_visited_latency_ = kArmCallLatency; +} + +void SchedulingLatencyVisitorARM::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) { + last_visited_internal_latency_ = kArmLoadStringInternalLatency; + last_visited_latency_ = kArmMemoryLoadLatency; +} + +void SchedulingLatencyVisitorARM::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) { + last_visited_internal_latency_ = kArmIntegerOpLatency + kArmCallInternalLatency; + last_visited_latency_ = kArmCallLatency; +} + +void SchedulingLatencyVisitorARM::VisitNewInstance(HNewInstance* instruction) { + if (instruction->IsStringAlloc()) { + last_visited_internal_latency_ = 2 * kArmMemoryLoadLatency + kArmCallInternalLatency; + } else { + last_visited_internal_latency_ = kArmCallInternalLatency; + } + last_visited_latency_ = kArmCallLatency; +} + +void SchedulingLatencyVisitorARM::VisitRem(HRem* instruction) { + Primitive::Type type = instruction->GetResultType(); + switch (type) { + case Primitive::kPrimInt: { + HInstruction* rhs = instruction->GetRight(); + if (rhs->IsConstant()) { + int32_t imm = Int32ConstantFrom(rhs->AsConstant()); + HandleDivRemConstantIntegralLatencies(imm); + } else { + last_visited_internal_latency_ = kArmDivIntegerLatency; + last_visited_latency_ = kArmMulIntegerLatency; + } + break; + } + default: + last_visited_internal_latency_ = kArmCallInternalLatency; + last_visited_latency_ = kArmCallLatency; + break; + } +} + +void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + DCHECK(codegen_ != nullptr); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); + + switch (field_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + case Primitive::kPrimInt: + last_visited_latency_ = kArmMemoryLoadLatency; + break; + + case Primitive::kPrimNot: + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency; + last_visited_latency_ = kArmMemoryLoadLatency; + } else { + last_visited_latency_ = kArmMemoryLoadLatency; + } + break; + + case Primitive::kPrimLong: + if (is_volatile && !atomic_ldrd_strd) { + last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency; + last_visited_latency_ = kArmMemoryLoadLatency; + } else { + last_visited_latency_ = kArmMemoryLoadLatency; + } + break; + + case Primitive::kPrimFloat: + last_visited_latency_ = kArmMemoryLoadLatency; + break; + + case Primitive::kPrimDouble: + if (is_volatile && !atomic_ldrd_strd) { + last_visited_internal_latency_ = + kArmMemoryLoadLatency + kArmIntegerOpLatency + kArmMemoryLoadLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } else { + last_visited_latency_ = kArmMemoryLoadLatency; + } + break; + + default: + last_visited_latency_ = kArmMemoryLoadLatency; + break; + } + + if (is_volatile) { + last_visited_internal_latency_ += kArmMemoryBarrierLatency; + } +} + +void SchedulingLatencyVisitorARM::HandleFieldSetLatencies(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + DCHECK(codegen_ != nullptr); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + bool needs_write_barrier = + CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); + + switch (field_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + if (is_volatile) { + last_visited_internal_latency_ = kArmMemoryBarrierLatency + kArmMemoryStoreLatency; + last_visited_latency_ = kArmMemoryBarrierLatency; + } else { + last_visited_latency_ = kArmMemoryStoreLatency; + } + break; + + case Primitive::kPrimInt: + case Primitive::kPrimNot: + if (kPoisonHeapReferences && needs_write_barrier) { + last_visited_internal_latency_ += kArmIntegerOpLatency * 2; + } + last_visited_latency_ = kArmMemoryStoreLatency; + break; + + case Primitive::kPrimLong: + if (is_volatile && !atomic_ldrd_strd) { + last_visited_internal_latency_ = + kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } else { + last_visited_latency_ = kArmMemoryStoreLatency; + } + break; + + case Primitive::kPrimFloat: + last_visited_latency_ = kArmMemoryStoreLatency; + break; + + case Primitive::kPrimDouble: + if (is_volatile && !atomic_ldrd_strd) { + last_visited_internal_latency_ = kArmIntegerOpLatency + + kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } else { + last_visited_latency_ = kArmMemoryStoreLatency; + } + break; + + default: + last_visited_latency_ = kArmMemoryStoreLatency; + break; + } +} + +void SchedulingLatencyVisitorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGetLatencies(instruction, instruction->GetFieldInfo()); +} + +void SchedulingLatencyVisitorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSetLatencies(instruction, instruction->GetFieldInfo()); +} + +void SchedulingLatencyVisitorARM::VisitSuspendCheck(HSuspendCheck* instruction) { + HBasicBlock* block = instruction->GetBlock(); + DCHECK((block->GetLoopInformation() != nullptr) || + (block->IsEntryBlock() && instruction->GetNext()->IsGoto())); + // Users do not use any data results. + last_visited_latency_ = 0; +} + +void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) { + Primitive::Type result_type = instr->GetResultType(); + Primitive::Type input_type = instr->GetInputType(); + + switch (result_type) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + last_visited_latency_ = kArmIntegerOpLatency; // SBFX or UBFX + break; + + case Primitive::kPrimInt: + switch (input_type) { + case Primitive::kPrimLong: + last_visited_latency_ = kArmIntegerOpLatency; // MOV + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency; + last_visited_latency_ = kArmFloatingPointOpLatency; + break; + default: + last_visited_latency_ = kArmIntegerOpLatency; + break; + } + break; + + case Primitive::kPrimLong: + switch (input_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + // MOV and extension + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + // invokes runtime + last_visited_internal_latency_ = kArmCallInternalLatency; + break; + default: + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + break; + } + break; + + case Primitive::kPrimFloat: + switch (input_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency; + last_visited_latency_ = kArmFloatingPointOpLatency; + break; + case Primitive::kPrimLong: + // invokes runtime + last_visited_internal_latency_ = kArmCallInternalLatency; + break; + case Primitive::kPrimDouble: + last_visited_latency_ = kArmFloatingPointOpLatency; + break; + default: + last_visited_latency_ = kArmFloatingPointOpLatency; + break; + } + break; + + case Primitive::kPrimDouble: + switch (input_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency; + last_visited_latency_ = kArmFloatingPointOpLatency; + break; + case Primitive::kPrimLong: + last_visited_internal_latency_ = 5 * kArmFloatingPointOpLatency; + last_visited_latency_ = kArmFloatingPointOpLatency; + break; + case Primitive::kPrimFloat: + last_visited_latency_ = kArmFloatingPointOpLatency; + break; + default: + last_visited_latency_ = kArmFloatingPointOpLatency; + break; + } + break; + + default: + last_visited_latency_ = kArmTypeConversionFloatingPointIntegerLatency; + break; + } +} + +void SchedulingLatencyVisitorARM::VisitArmDexCacheArraysBase(art::HArmDexCacheArraysBase*) { + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; +} + +} // namespace arm +} // namespace art diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h new file mode 100644 index 0000000000..897e97da49 --- /dev/null +++ b/compiler/optimizing/scheduler_arm.h @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ +#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ + +#ifdef ART_USE_OLD_ARM_BACKEND +#include "code_generator_arm.h" +#else +#include "code_generator_arm_vixl.h" +#endif +#include "scheduler.h" + +namespace art { +namespace arm { +#ifdef ART_USE_OLD_ARM_BACKEND +typedef CodeGeneratorARM CodeGeneratorARMType; +#else +typedef CodeGeneratorARMVIXL CodeGeneratorARMType; +#endif + +// AArch32 instruction latencies. +// We currently assume that all ARM CPUs share the same instruction latency list. +// The following latencies were tuned based on performance experiments and +// automatic tuning using differential evolution approach on various benchmarks. +static constexpr uint32_t kArmIntegerOpLatency = 2; +static constexpr uint32_t kArmFloatingPointOpLatency = 11; +static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4; +static constexpr uint32_t kArmMulIntegerLatency = 6; +static constexpr uint32_t kArmMulFloatingPointLatency = 11; +static constexpr uint32_t kArmDivIntegerLatency = 10; +static constexpr uint32_t kArmDivFloatLatency = 20; +static constexpr uint32_t kArmDivDoubleLatency = 25; +static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11; +static constexpr uint32_t kArmMemoryLoadLatency = 9; +static constexpr uint32_t kArmMemoryStoreLatency = 9; +static constexpr uint32_t kArmMemoryBarrierLatency = 6; +static constexpr uint32_t kArmBranchLatency = 4; +static constexpr uint32_t kArmCallLatency = 5; +static constexpr uint32_t kArmCallInternalLatency = 29; +static constexpr uint32_t kArmLoadStringInternalLatency = 10; +static constexpr uint32_t kArmNopLatency = 2; +static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18; +static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46; + +class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { + public: + explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen) + : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {} + + // Default visitor for instructions not handled specifically below. + void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArmIntegerOpLatency; + } + +// We add a second unused parameter to be able to use this macro like the others +// defined in `nodes.h`. +#define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \ + M(ArrayGet , unused) \ + M(ArrayLength , unused) \ + M(ArraySet , unused) \ + M(Add , unused) \ + M(Sub , unused) \ + M(And , unused) \ + M(Or , unused) \ + M(Ror , unused) \ + M(Xor , unused) \ + M(Shl , unused) \ + M(Shr , unused) \ + M(UShr , unused) \ + M(Mul , unused) \ + M(Div , unused) \ + M(Condition , unused) \ + M(Compare , unused) \ + M(BoundsCheck , unused) \ + M(InstanceFieldGet , unused) \ + M(InstanceFieldSet , unused) \ + M(InstanceOf , unused) \ + M(Invoke , unused) \ + M(LoadString , unused) \ + M(NewArray , unused) \ + M(NewInstance , unused) \ + M(Rem , unused) \ + M(StaticFieldGet , unused) \ + M(StaticFieldSet , unused) \ + M(SuspendCheck , unused) \ + M(TypeConversion , unused) + +#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ + M(BitwiseNegatedRight, unused) \ + M(MultiplyAccumulate, unused) \ + M(IntermediateAddress, unused) \ + M(IntermediateAddressIndex, unused) \ + M(DataProcWithShifterOp, unused) + +#define DECLARE_VISIT_INSTRUCTION(type, unused) \ + void Visit##type(H##type* instruction) OVERRIDE; + + FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) + +#undef DECLARE_VISIT_INSTRUCTION + + private: + void HandleBinaryOperationLantencies(HBinaryOperation* instr); + void HandleBitwiseOperationLantencies(HBinaryOperation* instr); + void HandleShiftLatencies(HBinaryOperation* instr); + void HandleDivRemConstantIntegralLatencies(int32_t imm); + void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info); + void HandleGenerateDataProcInstruction(bool internal_latency = false); + void HandleGenerateDataProc(HDataProcWithShifterOp* instruction); + void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction); + + // The latency setting for each HInstruction depends on how CodeGenerator may generate code, + // latency visitors may query CodeGenerator for such information for accurate latency settings. + CodeGeneratorARMType* codegen_; +}; + +class HSchedulerARM : public HScheduler { + public: + HSchedulerARM(ArenaAllocator* arena, + SchedulingNodeSelector* selector, + SchedulingLatencyVisitorARM* arm_latency_visitor) + : HScheduler(arena, arm_latency_visitor, selector) {} + ~HSchedulerARM() OVERRIDE {} + + bool IsSchedulable(const HInstruction* instruction) const OVERRIDE { +#define CASE_INSTRUCTION_KIND(type, unused) case \ + HInstruction::InstructionKind::k##type: + switch (instruction->GetKind()) { + FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND) + return true; + FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND) + return true; + default: + return HScheduler::IsSchedulable(instruction); + } +#undef CASE_INSTRUCTION_KIND + } + + private: + DISALLOW_COPY_AND_ASSIGN(HSchedulerARM); +}; + +} // namespace arm +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc index 558dcc4cbc..83b487fb5b 100644 --- a/compiler/optimizing/scheduler_arm64.cc +++ b/compiler/optimizing/scheduler_arm64.cc @@ -16,6 +16,7 @@ #include "scheduler_arm64.h" #include "code_generator_utils.h" +#include "mirror/array-inl.h" namespace art { namespace arm64 { @@ -43,6 +44,13 @@ void SchedulingLatencyVisitorARM64::VisitIntermediateAddress( last_visited_latency_ = kArm64IntegerOpLatency + 2; } +void SchedulingLatencyVisitorARM64::VisitIntermediateAddressIndex( + HIntermediateAddressIndex* instr ATTRIBUTE_UNUSED) { + // Although the code generated is a simple `add` instruction, we found through empirical results + // that spacing it from its use in memory accesses was beneficial. + last_visited_latency_ = kArm64DataProcWithShifterOpLatency + 2; +} + void SchedulingLatencyVisitorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) { last_visited_latency_ = kArm64MulIntegerLatency; } @@ -192,5 +200,148 @@ void SchedulingLatencyVisitorARM64::VisitTypeConversion(HTypeConversion* instr) } } +void SchedulingLatencyVisitorARM64::HandleSimpleArithmeticSIMD(HVecOperation *instr) { + if (Primitive::IsFloatingPointType(instr->GetPackedType())) { + last_visited_latency_ = kArm64SIMDFloatingPointOpLatency; + } else { + last_visited_latency_ = kArm64SIMDIntegerOpLatency; + } +} + +void SchedulingLatencyVisitorARM64::VisitVecReplicateScalar( + HVecReplicateScalar* instr ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArm64SIMDReplicateOpLatency; +} + +void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) { + LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId(); +} + +void SchedulingLatencyVisitorARM64::VisitVecSumReduce(HVecSumReduce* instr) { + LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId(); +} + +void SchedulingLatencyVisitorARM64::VisitVecCnv(HVecCnv* instr ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArm64SIMDTypeConversionInt2FPLatency; +} + +void SchedulingLatencyVisitorARM64::VisitVecNeg(HVecNeg* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecAbs(HVecAbs* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecNot(HVecNot* instr) { + if (instr->GetPackedType() == Primitive::kPrimBoolean) { + last_visited_internal_latency_ = kArm64SIMDIntegerOpLatency; + } + last_visited_latency_ = kArm64SIMDIntegerOpLatency; +} + +void SchedulingLatencyVisitorARM64::VisitVecAdd(HVecAdd* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecSub(HVecSub* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecMul(HVecMul* instr) { + if (Primitive::IsFloatingPointType(instr->GetPackedType())) { + last_visited_latency_ = kArm64SIMDMulFloatingPointLatency; + } else { + last_visited_latency_ = kArm64SIMDMulIntegerLatency; + } +} + +void SchedulingLatencyVisitorARM64::VisitVecDiv(HVecDiv* instr) { + if (instr->GetPackedType() == Primitive::kPrimFloat) { + last_visited_latency_ = kArm64SIMDDivFloatLatency; + } else { + DCHECK(instr->GetPackedType() == Primitive::kPrimDouble); + last_visited_latency_ = kArm64SIMDDivDoubleLatency; + } +} + +void SchedulingLatencyVisitorARM64::VisitVecMin(HVecMin* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecMax(HVecMax* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecAnd(HVecAnd* instr ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArm64SIMDIntegerOpLatency; +} + +void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr) { + LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId(); +} + +void SchedulingLatencyVisitorARM64::VisitVecOr(HVecOr* instr ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArm64SIMDIntegerOpLatency; +} + +void SchedulingLatencyVisitorARM64::VisitVecXor(HVecXor* instr ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArm64SIMDIntegerOpLatency; +} + +void SchedulingLatencyVisitorARM64::VisitVecShl(HVecShl* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecShr(HVecShr* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecUShr(HVecUShr* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecMultiplyAccumulate( + HVecMultiplyAccumulate* instr ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArm64SIMDMulIntegerLatency; +} + +void SchedulingLatencyVisitorARM64::HandleVecAddress( + HVecMemoryOperation* instruction, + size_t size ATTRIBUTE_UNUSED) { + HInstruction* index = instruction->InputAt(1); + if (!index->IsConstant()) { + last_visited_internal_latency_ += kArm64DataProcWithShifterOpLatency; + } +} + +void SchedulingLatencyVisitorARM64::VisitVecLoad(HVecLoad* instr) { + last_visited_internal_latency_ = 0; + size_t size = Primitive::ComponentSize(instr->GetPackedType()); + + if (instr->GetPackedType() == Primitive::kPrimChar + && mirror::kUseStringCompression + && instr->IsStringCharAt()) { + // Set latencies for the uncompressed case. + last_visited_internal_latency_ += kArm64MemoryLoadLatency + kArm64BranchLatency; + HandleVecAddress(instr, size); + last_visited_latency_ = kArm64SIMDMemoryLoadLatency; + } else { + HandleVecAddress(instr, size); + last_visited_latency_ = kArm64SIMDMemoryLoadLatency; + } +} + +void SchedulingLatencyVisitorARM64::VisitVecStore(HVecStore* instr) { + last_visited_internal_latency_ = 0; + size_t size = Primitive::ComponentSize(instr->GetPackedType()); + HandleVecAddress(instr, size); + last_visited_latency_ = kArm64SIMDMemoryStoreLatency; +} + } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h index 7a33720655..63d5b7d6b6 100644 --- a/compiler/optimizing/scheduler_arm64.h +++ b/compiler/optimizing/scheduler_arm64.h @@ -42,6 +42,18 @@ static constexpr uint32_t kArm64LoadStringInternalLatency = 7; static constexpr uint32_t kArm64MulFloatingPointLatency = 6; static constexpr uint32_t kArm64MulIntegerLatency = 6; static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5; +static constexpr uint32_t kArm64BranchLatency = kArm64IntegerOpLatency; + +static constexpr uint32_t kArm64SIMDFloatingPointOpLatency = 10; +static constexpr uint32_t kArm64SIMDIntegerOpLatency = 6; +static constexpr uint32_t kArm64SIMDMemoryLoadLatency = 10; +static constexpr uint32_t kArm64SIMDMemoryStoreLatency = 6; +static constexpr uint32_t kArm64SIMDMulFloatingPointLatency = 12; +static constexpr uint32_t kArm64SIMDMulIntegerLatency = 12; +static constexpr uint32_t kArm64SIMDReplicateOpLatency = 16; +static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60; +static constexpr uint32_t kArm64SIMDDivFloatLatency = 30; +static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10; class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { public: @@ -52,29 +64,54 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { // We add a second unused parameter to be able to use this macro like the others // defined in `nodes.h`. -#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \ - M(ArrayGet , unused) \ - M(ArrayLength , unused) \ - M(ArraySet , unused) \ - M(BinaryOperation , unused) \ - M(BoundsCheck , unused) \ - M(Div , unused) \ - M(InstanceFieldGet , unused) \ - M(InstanceOf , unused) \ - M(Invoke , unused) \ - M(LoadString , unused) \ - M(Mul , unused) \ - M(NewArray , unused) \ - M(NewInstance , unused) \ - M(Rem , unused) \ - M(StaticFieldGet , unused) \ - M(SuspendCheck , unused) \ - M(TypeConversion , unused) +#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \ + M(ArrayGet , unused) \ + M(ArrayLength , unused) \ + M(ArraySet , unused) \ + M(BinaryOperation , unused) \ + M(BoundsCheck , unused) \ + M(Div , unused) \ + M(InstanceFieldGet , unused) \ + M(InstanceOf , unused) \ + M(Invoke , unused) \ + M(LoadString , unused) \ + M(Mul , unused) \ + M(NewArray , unused) \ + M(NewInstance , unused) \ + M(Rem , unused) \ + M(StaticFieldGet , unused) \ + M(SuspendCheck , unused) \ + M(TypeConversion , unused) \ + M(VecReplicateScalar , unused) \ + M(VecSetScalars , unused) \ + M(VecSumReduce , unused) \ + M(VecCnv , unused) \ + M(VecNeg , unused) \ + M(VecAbs , unused) \ + M(VecNot , unused) \ + M(VecAdd , unused) \ + M(VecHalvingAdd , unused) \ + M(VecSub , unused) \ + M(VecMul , unused) \ + M(VecDiv , unused) \ + M(VecMin , unused) \ + M(VecMax , unused) \ + M(VecAnd , unused) \ + M(VecAndNot , unused) \ + M(VecOr , unused) \ + M(VecXor , unused) \ + M(VecShl , unused) \ + M(VecShr , unused) \ + M(VecUShr , unused) \ + M(VecMultiplyAccumulate, unused) \ + M(VecLoad , unused) \ + M(VecStore , unused) #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ M(BitwiseNegatedRight, unused) \ M(MultiplyAccumulate, unused) \ M(IntermediateAddress, unused) \ + M(IntermediateAddressIndex, unused) \ M(DataProcWithShifterOp, unused) #define DECLARE_VISIT_INSTRUCTION(type, unused) \ @@ -85,6 +122,10 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + + private: + void HandleSimpleArithmeticSIMD(HVecOperation *instr); + void HandleVecAddress(HVecMemoryOperation* instruction, size_t size); }; class HSchedulerARM64 : public HScheduler { @@ -101,6 +142,8 @@ class HSchedulerARM64 : public HScheduler { return true; FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND) return true; + FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(CASE_INSTRUCTION_KIND) + return true; default: return HScheduler::IsSchedulable(instruction); } diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc index 31d13e2a26..d87600aa5e 100644 --- a/compiler/optimizing/scheduler_test.cc +++ b/compiler/optimizing/scheduler_test.cc @@ -28,6 +28,10 @@ #include "scheduler_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_arm +#include "scheduler_arm.h" +#endif + namespace art { // Return all combinations of ISA and code generator that are executable on @@ -56,7 +60,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() { #endif }; - for (auto test_config : test_config_candidates) { + for (const CodegenTargetConfig& test_config : test_config_candidates) { if (CanExecute(test_config.GetInstructionSet())) { v.push_back(test_config); } @@ -65,133 +69,151 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() { return v; } -class SchedulerTest : public CommonCompilerTest {}; - -#ifdef ART_ENABLE_CODEGEN_arm64 -TEST_F(SchedulerTest, DependencyGraph) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* entry = new (&allocator) HBasicBlock(graph); - HBasicBlock* block1 = new (&allocator) HBasicBlock(graph); - graph->AddBlock(entry); - graph->AddBlock(block1); - graph->SetEntryBlock(entry); - - // entry: - // array ParameterValue - // c1 IntConstant - // c2 IntConstant - // block1: - // add1 Add [c1, c2] - // add2 Add [add1, c2] - // mul Mul [add1, add2] - // div_check DivZeroCheck [add2] (env: add2, mul) - // div Div [add1, div_check] - // array_get1 ArrayGet [array, add1] - // array_set1 ArraySet [array, add1, add2] - // array_get2 ArrayGet [array, add1] - // array_set2 ArraySet [array, add1, add2] - - HInstruction* array = new (&allocator) HParameterValue(graph->GetDexFile(), - dex::TypeIndex(0), - 0, - Primitive::kPrimNot); - HInstruction* c1 = graph->GetIntConstant(1); - HInstruction* c2 = graph->GetIntConstant(10); - HInstruction* add1 = new (&allocator) HAdd(Primitive::kPrimInt, c1, c2); - HInstruction* add2 = new (&allocator) HAdd(Primitive::kPrimInt, add1, c2); - HInstruction* mul = new (&allocator) HMul(Primitive::kPrimInt, add1, add2); - HInstruction* div_check = new (&allocator) HDivZeroCheck(add2, 0); - HInstruction* div = new (&allocator) HDiv(Primitive::kPrimInt, add1, div_check, 0); - HInstruction* array_get1 = new (&allocator) HArrayGet(array, add1, Primitive::kPrimInt, 0); - HInstruction* array_set1 = new (&allocator) HArraySet(array, add1, add2, Primitive::kPrimInt, 0); - HInstruction* array_get2 = new (&allocator) HArrayGet(array, add1, Primitive::kPrimInt, 0); - HInstruction* array_set2 = new (&allocator) HArraySet(array, add1, add2, Primitive::kPrimInt, 0); - - DCHECK(div_check->CanThrow()); - - entry->AddInstruction(array); - - HInstruction* block_instructions[] = {add1, - add2, - mul, - div_check, - div, - array_get1, - array_set1, - array_get2, - array_set2}; - for (auto instr : block_instructions) { - block1->AddInstruction(instr); +class SchedulerTest : public CommonCompilerTest { + public: + SchedulerTest() : pool_(), allocator_(&pool_) { + graph_ = CreateGraph(&allocator_); } - HEnvironment* environment = new (&allocator) HEnvironment(&allocator, - 2, - graph->GetArtMethod(), + // Build scheduling graph, and run target specific scheduling on it. + void TestBuildDependencyGraphAndSchedule(HScheduler* scheduler) { + HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* block1 = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(entry); + graph_->AddBlock(block1); + graph_->SetEntryBlock(entry); + + // entry: + // array ParameterValue + // c1 IntConstant + // c2 IntConstant + // block1: + // add1 Add [c1, c2] + // add2 Add [add1, c2] + // mul Mul [add1, add2] + // div_check DivZeroCheck [add2] (env: add2, mul) + // div Div [add1, div_check] + // array_get1 ArrayGet [array, add1] + // array_set1 ArraySet [array, add1, add2] + // array_get2 ArrayGet [array, add1] + // array_set2 ArraySet [array, add1, add2] + + HInstruction* array = new (&allocator_) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), 0, - div_check); - div_check->SetRawEnvironment(environment); - environment->SetRawEnvAt(0, add2); - add2->AddEnvUseAt(div_check->GetEnvironment(), 0); - environment->SetRawEnvAt(1, mul); - mul->AddEnvUseAt(div_check->GetEnvironment(), 1); - - ArenaAllocator* arena = graph->GetArena(); - CriticalPathSchedulingNodeSelector critical_path_selector; - arm64::HSchedulerARM64 scheduler(arena, &critical_path_selector); - SchedulingGraph scheduling_graph(&scheduler, arena); - // Instructions must be inserted in reverse order into the scheduling graph. - for (auto instr : ReverseRange(block_instructions)) { - scheduling_graph.AddNode(instr); + Primitive::kPrimNot); + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* c2 = graph_->GetIntConstant(10); + HInstruction* add1 = new (&allocator_) HAdd(Primitive::kPrimInt, c1, c2); + HInstruction* add2 = new (&allocator_) HAdd(Primitive::kPrimInt, add1, c2); + HInstruction* mul = new (&allocator_) HMul(Primitive::kPrimInt, add1, add2); + HInstruction* div_check = new (&allocator_) HDivZeroCheck(add2, 0); + HInstruction* div = new (&allocator_) HDiv(Primitive::kPrimInt, add1, div_check, 0); + HInstruction* array_get1 = new (&allocator_) HArrayGet(array, add1, Primitive::kPrimInt, 0); + HInstruction* array_set1 = new (&allocator_) HArraySet(array, add1, add2, Primitive::kPrimInt, 0); + HInstruction* array_get2 = new (&allocator_) HArrayGet(array, add1, Primitive::kPrimInt, 0); + HInstruction* array_set2 = new (&allocator_) HArraySet(array, add1, add2, Primitive::kPrimInt, 0); + + DCHECK(div_check->CanThrow()); + + entry->AddInstruction(array); + + HInstruction* block_instructions[] = {add1, + add2, + mul, + div_check, + div, + array_get1, + array_set1, + array_get2, + array_set2}; + for (HInstruction* instr : block_instructions) { + block1->AddInstruction(instr); + } + + HEnvironment* environment = new (&allocator_) HEnvironment(&allocator_, + 2, + graph_->GetArtMethod(), + 0, + div_check); + div_check->SetRawEnvironment(environment); + environment->SetRawEnvAt(0, add2); + add2->AddEnvUseAt(div_check->GetEnvironment(), 0); + environment->SetRawEnvAt(1, mul); + mul->AddEnvUseAt(div_check->GetEnvironment(), 1); + + SchedulingGraph scheduling_graph(scheduler, graph_->GetArena()); + // Instructions must be inserted in reverse order into the scheduling graph. + for (HInstruction* instr : ReverseRange(block_instructions)) { + scheduling_graph.AddNode(instr); + } + + // Should not have dependencies cross basic blocks. + ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, c1)); + ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add2, c2)); + + // Define-use dependency. + ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(add2, add1)); + ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, add2)); + ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div_check, add2)); + ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(div_check, add1)); + ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div, div_check)); + ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add1)); + ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add2)); + + // Read and write dependencies + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, array_get1)); + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_get2)); + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_get2, array_set1)); + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1)); + + // Env dependency. + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(div_check, mul)); + ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(mul, div_check)); + + // CanThrow. + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, div_check)); + + // Exercise the code path of target specific scheduler and SchedulingLatencyVisitor. + scheduler->Schedule(graph_); } - // Should not have dependencies cross basic blocks. - ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, c1)); - ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add2, c2)); - - // Define-use dependency. - ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(add2, add1)); - ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, add2)); - ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div_check, add2)); - ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(div_check, add1)); - ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div, div_check)); - ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add1)); - ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add2)); - - // Read and write dependencies - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, array_get1)); - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_get2)); - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_get2, array_set1)); - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1)); - - // Env dependency. - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(div_check, mul)); - ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(mul, div_check)); - - // CanThrow. - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, div_check)); + void CompileWithRandomSchedulerAndRun(const uint16_t* data, bool has_result, int expected) { + for (CodegenTargetConfig target_config : GetTargetConfigs()) { + HGraph* graph = CreateCFG(&allocator_, data); + + // Schedule the graph randomly. + HInstructionScheduling scheduling(graph, target_config.GetInstructionSet()); + scheduling.Run(/*only_optimize_loop_blocks*/ false, /*schedule_randomly*/ true); + + RunCode(target_config, + graph, + [](HGraph* graph_arg) { RemoveSuspendChecks(graph_arg); }, + has_result, expected); + } + } + + ArenaPool pool_; + ArenaAllocator allocator_; + HGraph* graph_; +}; + +#if defined(ART_ENABLE_CODEGEN_arm64) +TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM64) { + CriticalPathSchedulingNodeSelector critical_path_selector; + arm64::HSchedulerARM64 scheduler(&allocator_, &critical_path_selector); + TestBuildDependencyGraphAndSchedule(&scheduler); } #endif -static void CompileWithRandomSchedulerAndRun(const uint16_t* data, - bool has_result, - int expected) { - for (CodegenTargetConfig target_config : GetTargetConfigs()) { - ArenaPool pool; - ArenaAllocator arena(&pool); - HGraph* graph = CreateCFG(&arena, data); - - // Schedule the graph randomly. - HInstructionScheduling scheduling(graph, target_config.GetInstructionSet()); - scheduling.Run(/*only_optimize_loop_blocks*/ false, /*schedule_randomly*/ true); - - RunCode(target_config, - graph, - [](HGraph* graph_arg) { RemoveSuspendChecks(graph_arg); }, - has_result, expected); - } +#if defined(ART_ENABLE_CODEGEN_arm) +TEST_F(SchedulerTest, DependencyGrapAndSchedulerARM) { + CriticalPathSchedulingNodeSelector critical_path_selector; + arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr); + arm::HSchedulerARM scheduler(&allocator_, &critical_path_selector, &arm_latency_visitor); + TestBuildDependencyGraphAndSchedule(&scheduler); } +#endif TEST_F(SchedulerTest, RandomScheduling) { // diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index eedaf6e67e..106b709eda 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -16,6 +16,7 @@ #include "sharpening.h" +#include "art_method-inl.h" #include "base/casts.h" #include "base/enums.h" #include "class_linker.h" @@ -41,7 +42,9 @@ void HSharpening::Run() { for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); if (instruction->IsInvokeStaticOrDirect()) { - SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(), codegen_); + SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(), + codegen_, + compiler_driver_); } else if (instruction->IsLoadString()) { ProcessLoadString(instruction->AsLoadString()); } @@ -56,7 +59,7 @@ static bool IsInBootImage(ArtMethod* method) { const std::vector<gc::space::ImageSpace*>& image_spaces = Runtime::Current()->GetHeap()->GetBootImageSpaces(); for (gc::space::ImageSpace* image_space : image_spaces) { - const auto& method_section = image_space->GetImageHeader().GetMethodsSection(); + const ImageSection& method_section = image_space->GetImageHeader().GetMethodsSection(); if (method_section.Contains(reinterpret_cast<uint8_t*>(method) - image_space->Begin())) { return true; } @@ -68,9 +71,21 @@ static bool AOTCanEmbedMethod(ArtMethod* method, const CompilerOptions& options) return IsInBootImage(method) && !options.GetCompilePic(); } +static bool BootImageAOTCanEmbedMethod(ArtMethod* method, CompilerDriver* compiler_driver) { + DCHECK(compiler_driver->GetCompilerOptions().IsBootImage()); + if (!compiler_driver->GetSupportBootImageFixup()) { + return false; + } + ScopedObjectAccess soa(Thread::Current()); + ObjPtr<mirror::Class> klass = method->GetDeclaringClass(); + DCHECK(klass != nullptr); + const DexFile& dex_file = klass->GetDexFile(); + return compiler_driver->IsImageClass(dex_file.StringByTypeIdx(klass->GetDexTypeIndex())); +} void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, - CodeGenerator* codegen) { + CodeGenerator* codegen, + CompilerDriver* compiler_driver) { if (invoke->IsStringInit()) { // Not using the dex cache arrays. But we could still try to use a better dispatch... // TODO: Use direct_method and direct_code for the appropriate StringFactory method. @@ -108,6 +123,10 @@ void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress; method_load_data = reinterpret_cast<uintptr_t>(callee); code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; + } else if (codegen->GetCompilerOptions().IsBootImage() && + BootImageAOTCanEmbedMethod(callee, compiler_driver)) { + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative; + code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; } else { // Use PC-relative access to the dex cache arrays. method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative; @@ -140,7 +159,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, CompilerDriver* compiler_driver, const DexCompilationUnit& dex_compilation_unit) { Handle<mirror::Class> klass = load_class->GetClass(); - DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCacheViaMethod || + DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kRuntimeCall || load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass) << load_class->GetLoadKind(); DCHECK(!load_class->IsInBootImage()) << "HLoadClass should not be optimized before sharpening."; @@ -166,13 +185,11 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, DCHECK(!runtime->UseJitCompilation()); if (!compiler_driver->GetSupportBootImageFixup()) { // compiler_driver_test. Do not sharpen. - desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; - } else if ((klass != nullptr) && compiler_driver->IsImageClass( - dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) { + desired_load_kind = HLoadClass::LoadKind::kRuntimeCall; + } else if ((klass != nullptr) && + compiler_driver->IsImageClass(dex_file.StringByTypeIdx(type_index))) { is_in_boot_image = true; - desired_load_kind = codegen->GetCompilerOptions().GetCompilePic() - ? HLoadClass::LoadKind::kBootImageLinkTimePcRelative - : HLoadClass::LoadKind::kBootImageLinkTimeAddress; + desired_load_kind = HLoadClass::LoadKind::kBootImageLinkTimePcRelative; } else { // Not a boot image class. DCHECK(ContainsElement(compiler_driver->GetDexFilesForOatFile(), &dex_file)); @@ -182,8 +199,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, is_in_boot_image = (klass != nullptr) && runtime->GetHeap()->ObjectIsInBootImageSpace(klass.Get()); if (runtime->UseJitCompilation()) { - // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus. - // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic()); + DCHECK(!codegen->GetCompilerOptions().GetCompilePic()); if (is_in_boot_image) { // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787 desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; @@ -194,7 +210,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, // this `HLoadClass` hasn't been executed in the interpreter. // Fallback to the dex cache. // TODO(ngeoffray): Generate HDeoptimize instead. - desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; + desired_load_kind = HLoadClass::LoadKind::kRuntimeCall; } } else if (is_in_boot_image && !codegen->GetCompilerOptions().GetCompilePic()) { // AOT app compilation. Check if the class is in the boot image. @@ -213,7 +229,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, } if (!IsSameDexFile(load_class->GetDexFile(), *dex_compilation_unit.GetDexFile())) { - if ((load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) || + if ((load_kind == HLoadClass::LoadKind::kRuntimeCall) || (load_kind == HLoadClass::LoadKind::kBssEntry)) { // We actually cannot reference this class, we're forced to bail. // We cannot reference this class with Bss, as the entrypoint will lookup the class @@ -225,7 +241,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, } void HSharpening::ProcessLoadString(HLoadString* load_string) { - DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod); + DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall); const DexFile& dex_file = load_string->GetDexFile(); dex::StringIndex string_index = load_string->GetStringIndex(); @@ -249,16 +265,13 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { CHECK(string != nullptr); if (compiler_driver_->GetSupportBootImageFixup()) { DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file)); - desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic() - ? HLoadString::LoadKind::kBootImageLinkTimePcRelative - : HLoadString::LoadKind::kBootImageLinkTimeAddress; + desired_load_kind = HLoadString::LoadKind::kBootImageLinkTimePcRelative; } else { // compiler_driver_test. Do not sharpen. - desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; + desired_load_kind = HLoadString::LoadKind::kRuntimeCall; } } else if (runtime->UseJitCompilation()) { - // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus. - // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic()); + DCHECK(!codegen_->GetCompilerOptions().GetCompilePic()); string = class_linker->LookupString(dex_file, string_index, dex_cache.Get()); if (string != nullptr) { if (runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { @@ -267,7 +280,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { desired_load_kind = HLoadString::LoadKind::kJitTableAddress; } } else { - desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; + desired_load_kind = HLoadString::LoadKind::kRuntimeCall; } } else { // AOT app compilation. Try to lookup the string without allocating if not found. diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h index 10707c796f..f74b0afdbf 100644 --- a/compiler/optimizing/sharpening.h +++ b/compiler/optimizing/sharpening.h @@ -55,7 +55,9 @@ class HSharpening : public HOptimization { REQUIRES_SHARED(Locks::mutator_lock_); // Used by Sharpening and InstructionSimplifier. - static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, CodeGenerator* codegen); + static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, + CodeGenerator* codegen, + CompilerDriver* compiler_driver); private: void ProcessLoadString(HLoadString* load_string); diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index 0ed8a35338..0f24e81be2 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -652,6 +652,9 @@ class ArmAssembler : public Assembler { virtual void blx(Register rm, Condition cond = AL) = 0; virtual void bx(Register rm, Condition cond = AL) = 0; + // ADR instruction loading register for branching to the label. + virtual void AdrCode(Register rt, Label* label) = 0; + // Memory barriers. virtual void dmb(DmbOptions flavor) = 0; diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 1e71d06b49..d7096b3c87 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -214,14 +214,14 @@ void Thumb2Assembler::EmitFixups(uint32_t adjusted_code_size) { DCHECK_GE(dest_end, src_end); for (auto i = fixups_.rbegin(), end = fixups_.rend(); i != end; ++i) { Fixup* fixup = &*i; + size_t old_fixup_location = fixup->GetLocation(); if (fixup->GetOriginalSize() == fixup->GetSize()) { // The size of this Fixup didn't change. To avoid moving the data // in small chunks, emit the code to its original position. - fixup->Emit(&buffer_, adjusted_code_size); fixup->Finalize(dest_end - src_end); + fixup->Emit(old_fixup_location, &buffer_, adjusted_code_size); } else { // Move the data between the end of the fixup and src_end to its final location. - size_t old_fixup_location = fixup->GetLocation(); size_t src_begin = old_fixup_location + fixup->GetOriginalSizeInBytes(); size_t data_size = src_end - src_begin; size_t dest_begin = dest_end - data_size; @@ -230,7 +230,7 @@ void Thumb2Assembler::EmitFixups(uint32_t adjusted_code_size) { dest_end = dest_begin - fixup->GetSizeInBytes(); // Finalize the Fixup and emit the data to the new location. fixup->Finalize(dest_end - src_end); - fixup->Emit(&buffer_, adjusted_code_size); + fixup->Emit(fixup->GetLocation(), &buffer_, adjusted_code_size); } } CHECK_EQ(src_end, dest_end); @@ -1895,6 +1895,9 @@ inline size_t Thumb2Assembler::Fixup::SizeInBytes(Size size) { case kCbxz48Bit: return 6u; + case kCodeAddr4KiB: + return 4u; + case kLiteral1KiB: return 2u; case kLiteral4KiB: @@ -1973,6 +1976,15 @@ inline int32_t Thumb2Assembler::Fixup::GetOffset(uint32_t current_code_size) con diff -= 2; // Extra CMP Rn, #0, 16-bit. break; + case kCodeAddr4KiB: + // The ADR instruction rounds down the PC+4 to a multiple of 4, so if the PC + // isn't a multiple of 2, we need to adjust. + DCHECK_ALIGNED(diff, 2); + diff += location_ & 2; + // Add the Thumb mode bit. + diff += 1; + break; + case kLiteral1KiB: case kLiteral4KiB: case kLongOrFPLiteral1KiB: @@ -1987,8 +1999,8 @@ inline int32_t Thumb2Assembler::Fixup::GetOffset(uint32_t current_code_size) con diff = diff + (diff & 2); DCHECK_GE(diff, 0); break; - case kLiteral1MiB: case kLiteral64KiB: + case kLiteral1MiB: case kLongOrFPLiteral64KiB: case kLiteralAddr64KiB: DCHECK_GE(diff, 4); // The target must be at least 4 bytes after the ADD rX, PC. @@ -2041,6 +2053,10 @@ bool Thumb2Assembler::Fixup::IsCandidateForEmitEarly() const { // We don't support conditional branches beyond +-1MiB. return true; + case kCodeAddr4KiB: + // ADR uses the aligned PC and as such the offset cannot be calculated early. + return false; + case kLiteral1KiB: case kLiteral4KiB: case kLiteral64KiB: @@ -2087,6 +2103,10 @@ uint32_t Thumb2Assembler::Fixup::AdjustSizeIfNeeded(uint32_t current_code_size) // We don't support conditional branches beyond +-1MiB. break; + case kCodeAddr4KiB: + // We don't support Code address ADR beyond +4KiB. + break; + case kLiteral1KiB: DCHECK(!IsHighRegister(rn_)); if (IsUint<10>(GetOffset(current_code_size))) { @@ -2159,13 +2179,15 @@ uint32_t Thumb2Assembler::Fixup::AdjustSizeIfNeeded(uint32_t current_code_size) return current_code_size - old_code_size; } -void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) const { +void Thumb2Assembler::Fixup::Emit(uint32_t emit_location, + AssemblerBuffer* buffer, + uint32_t code_size) const { switch (GetSize()) { case kBranch16Bit: { DCHECK(type_ == kUnconditional || type_ == kConditional); DCHECK_EQ(type_ == kConditional, cond_ != AL); int16_t encoding = BEncoding16(GetOffset(code_size), cond_); - buffer->Store<int16_t>(location_, encoding); + buffer->Store<int16_t>(emit_location, encoding); break; } case kBranch32Bit: { @@ -2180,15 +2202,15 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c DCHECK_NE(encoding & B12, 0); encoding ^= B14 | B12; } - buffer->Store<int16_t>(location_, encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } case kCbxz16Bit: { DCHECK(type_ == kCompareAndBranchXZero); int16_t encoding = CbxzEncoding16(rn_, GetOffset(code_size), cond_); - buffer->Store<int16_t>(location_, encoding); + buffer->Store<int16_t>(emit_location, encoding); break; } case kCbxz32Bit: { @@ -2196,8 +2218,8 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c DCHECK(cond_ == EQ || cond_ == NE); int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0); int16_t b_encoding = BEncoding16(GetOffset(code_size), cond_); - buffer->Store<int16_t>(location_, cmp_encoding); - buffer->Store<int16_t>(location_ + 2, b_encoding); + buffer->Store<int16_t>(emit_location, cmp_encoding); + buffer->Store<int16_t>(emit_location + 2, b_encoding); break; } case kCbxz48Bit: { @@ -2205,24 +2227,32 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c DCHECK(cond_ == EQ || cond_ == NE); int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0); int32_t b_encoding = BEncoding32(GetOffset(code_size), cond_); - buffer->Store<int16_t>(location_, cmp_encoding); - buffer->Store<int16_t>(location_ + 2u, b_encoding >> 16); - buffer->Store<int16_t>(location_ + 4u, static_cast<int16_t>(b_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, cmp_encoding); + buffer->Store<int16_t>(emit_location + 2u, b_encoding >> 16); + buffer->Store<int16_t>(emit_location + 4u, static_cast<int16_t>(b_encoding & 0xffff)); + break; + } + + case kCodeAddr4KiB: { + DCHECK(type_ == kLoadCodeAddr); + int32_t encoding = AdrEncoding32(rn_, GetOffset(code_size)); + buffer->Store<int16_t>(emit_location, encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } case kLiteral1KiB: { DCHECK(type_ == kLoadLiteralNarrow); int16_t encoding = LdrLitEncoding16(rn_, GetOffset(code_size)); - buffer->Store<int16_t>(location_, encoding); + buffer->Store<int16_t>(emit_location, encoding); break; } case kLiteral4KiB: { DCHECK(type_ == kLoadLiteralNarrow); // GetOffset() uses PC+4 but load literal uses AlignDown(PC+4, 4). Adjust offset accordingly. int32_t encoding = LdrLitEncoding32(rn_, GetOffset(code_size)); - buffer->Store<int16_t>(location_, encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } case kLiteral64KiB: { @@ -2242,11 +2272,11 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c int32_t mov_encoding = MovModImmEncoding32(rn_, offset & ~0xfff); int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, offset & 0xfff); - buffer->Store<int16_t>(location_, mov_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); - buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16); - buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, mov_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, add_pc_encoding); + buffer->Store<int16_t>(emit_location + 6u, ldr_encoding >> 16); + buffer->Store<int16_t>(emit_location + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); break; } case kLiteralFar: { @@ -2256,36 +2286,36 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff); int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, 0); - buffer->Store<int16_t>(location_, movw_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16); - buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 8u, add_pc_encoding); - buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16); - buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, movw_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, movt_encoding >> 16); + buffer->Store<int16_t>(emit_location + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 8u, add_pc_encoding); + buffer->Store<int16_t>(emit_location + 10u, ldr_encoding >> 16); + buffer->Store<int16_t>(emit_location + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); break; } case kLiteralAddr1KiB: { DCHECK(type_ == kLoadLiteralAddr); int16_t encoding = AdrEncoding16(rn_, GetOffset(code_size)); - buffer->Store<int16_t>(location_, encoding); + buffer->Store<int16_t>(emit_location, encoding); break; } case kLiteralAddr4KiB: { DCHECK(type_ == kLoadLiteralAddr); int32_t encoding = AdrEncoding32(rn_, GetOffset(code_size)); - buffer->Store<int16_t>(location_, encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } case kLiteralAddr64KiB: { DCHECK(type_ == kLoadLiteralAddr); int32_t mov_encoding = MovwEncoding32(rn_, GetOffset(code_size)); int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); - buffer->Store<int16_t>(location_, mov_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); + buffer->Store<int16_t>(emit_location, mov_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, add_pc_encoding); break; } case kLiteralAddrFar: { @@ -2294,29 +2324,29 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c int32_t movw_encoding = MovwEncoding32(rn_, offset & 0xffff); int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff); int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); - buffer->Store<int16_t>(location_, movw_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16); - buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 8u, add_pc_encoding); + buffer->Store<int16_t>(emit_location, movw_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, movt_encoding >> 16); + buffer->Store<int16_t>(emit_location + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 8u, add_pc_encoding); break; } case kLongOrFPLiteral1KiB: { int32_t encoding = LoadWideOrFpEncoding(PC, GetOffset(code_size)); // DCHECKs type_. - buffer->Store<int16_t>(location_, encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } case kLongOrFPLiteral64KiB: { int32_t mov_encoding = MovwEncoding32(IP, GetOffset(code_size)); int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC); int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0u); // DCHECKs type_. - buffer->Store<int16_t>(location_, mov_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); - buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16); - buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, mov_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, add_pc_encoding); + buffer->Store<int16_t>(emit_location + 6u, ldr_encoding >> 16); + buffer->Store<int16_t>(emit_location + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); break; } case kLongOrFPLiteralFar: { @@ -2325,13 +2355,13 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c int32_t movt_encoding = MovtEncoding32(IP, offset & ~0xffff); int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC); int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0); // DCHECKs type_. - buffer->Store<int16_t>(location_, movw_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16); - buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 8u, add_pc_encoding); - buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16); - buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, movw_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, movt_encoding >> 16); + buffer->Store<int16_t>(emit_location + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 8u, add_pc_encoding); + buffer->Store<int16_t>(emit_location + 10u, ldr_encoding >> 16); + buffer->Store<int16_t>(emit_location + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); break; } } @@ -3331,6 +3361,19 @@ void Thumb2Assembler::bx(Register rm, Condition cond) { } +void Thumb2Assembler::AdrCode(Register rt, Label* label) { + uint32_t pc = buffer_.Size(); + FixupId branch_id = AddFixup(Fixup::LoadCodeAddress(pc, rt)); + CHECK(!label->IsBound()); + // ADR target must be an unbound label. Add it to a singly-linked list maintained within + // the code with the label serving as the head. + Emit16(static_cast<uint16_t>(label->position_)); + label->LinkTo(branch_id); + Emit16(0); + DCHECK_EQ(buffer_.Size() - pc, GetFixup(branch_id)->GetSizeInBytes()); +} + + void Thumb2Assembler::Push(Register rd, Condition cond) { str(rd, Address(SP, -kRegisterSize, Address::PreIndex), cond); } @@ -3405,7 +3448,7 @@ void Thumb2Assembler::Bind(Label* label) { break; } } - last_fixup.Emit(&buffer_, buffer_.Size()); + last_fixup.Emit(last_fixup.GetLocation(), &buffer_, buffer_.Size()); fixups_.pop_back(); } } diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 1c495aa7a7..2ff9018510 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -268,6 +268,9 @@ class Thumb2Assembler FINAL : public ArmAssembler { void blx(Register rm, Condition cond = AL) OVERRIDE; void bx(Register rm, Condition cond = AL) OVERRIDE; + // ADR instruction loading register for branching to the label, including the Thumb mode bit. + void AdrCode(Register rt, Label* label) OVERRIDE; + virtual void Lsl(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void Lsr(Register rd, Register rm, uint32_t shift_imm, @@ -377,6 +380,10 @@ class Thumb2Assembler FINAL : public ArmAssembler { force_32bit_ = true; } + void Allow16Bit() { + force_32bit_ = false; + } + // Emit an ADR (or a sequence of instructions) to load the jump table address into base_reg. This // will generate a fixup. JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE; @@ -422,6 +429,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { kUnconditionalLink, // BL. kUnconditionalLinkX, // BLX. kCompareAndBranchXZero, // cbz/cbnz. + kLoadCodeAddr, // Get address of a code label, used for Baker read barriers. kLoadLiteralNarrow, // Load narrrow integer literal. kLoadLiteralWide, // Load wide integer literal. kLoadLiteralAddr, // Load address of literal (used for jump table). @@ -442,6 +450,10 @@ class Thumb2Assembler FINAL : public ArmAssembler { kCbxz32Bit, // CMP rX, #0 + Bcc label; X < 8; 16-bit Bcc; +-8-bit offset. kCbxz48Bit, // CMP rX, #0 + Bcc label; X < 8; 32-bit Bcc; up to +-1MiB offset. + // ADR variants. + kCodeAddr4KiB, // ADR rX, <label>; label must be after the ADR but within 4KiB range. + // Multi-instruction expansion is not supported. + // Load integer literal variants. // LDR rX, label; X < 8; 16-bit variant up to 1KiB offset; 2 bytes. kLiteral1KiB, @@ -492,6 +504,12 @@ class Thumb2Assembler FINAL : public ArmAssembler { cond, kCompareAndBranchXZero, kCbxz16Bit, location); } + // Code address. + static Fixup LoadCodeAddress(uint32_t location, Register rt) { + return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister, + AL, kLoadCodeAddr, kCodeAddr4KiB, location); + } + // Load narrow literal. static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size) { DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB || @@ -550,6 +568,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { switch (GetOriginalSize()) { case kBranch32Bit: case kCbxz48Bit: + case kCodeAddr4KiB: case kLiteralFar: case kLiteralAddrFar: case kLongOrFPLiteralFar: @@ -623,7 +642,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { // Emit the branch instruction into the assembler buffer. This does the // encoding into the thumb instruction. - void Emit(AssemblerBuffer* buffer, uint32_t code_size) const; + void Emit(uint32_t emit_location, AssemblerBuffer* buffer, uint32_t code_size) const; private: Fixup(Register rn, Register rt2, SRegister sd, DRegister dd, @@ -903,6 +922,26 @@ class Thumb2Assembler FINAL : public ArmAssembler { FixupId last_fixup_id_; }; +class ScopedForce32Bit { + public: + explicit ScopedForce32Bit(Thumb2Assembler* assembler, bool force = true) + : assembler_(assembler), old_force_32bit_(assembler->IsForced32Bit()) { + if (force) { + assembler->Force32Bit(); + } + } + + ~ScopedForce32Bit() { + if (!old_force_32bit_) { + assembler_->Allow16Bit(); + } + } + + private: + Thumb2Assembler* const assembler_; + const bool old_force_32bit_; +}; + } // namespace arm } // namespace art diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index f8c4008b45..eaaf81518a 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -5535,7 +5535,7 @@ const char* const VixlJniHelpersResults[] = { " f0: f1bc 0f00 cmp.w ip, #0\n", " f4: bf18 it ne\n", " f6: f20d 4c01 addwne ip, sp, #1025 ; 0x401\n", - " fa: f8d9 c084 ldr.w ip, [r9, #132] ; 0x84\n", + " fa: f8d9 c08c ldr.w ip, [r9, #140] ; 0x8c\n", " fe: f1bc 0f00 cmp.w ip, #0\n", " 102: d171 bne.n 1e8 <VixlJniHelpers+0x1e8>\n", " 104: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", @@ -5610,7 +5610,7 @@ const char* const VixlJniHelpersResults[] = { " 214: ecbd 8a10 vpop {s16-s31}\n", " 218: e8bd 8de0 ldmia.w sp!, {r5, r6, r7, r8, sl, fp, pc}\n", " 21c: 4660 mov r0, ip\n", - " 21e: f8d9 c2b8 ldr.w ip, [r9, #696] ; 0x2b8\n", + " 21e: f8d9 c2c0 ldr.w ip, [r9, #704] ; 0x2c0\n", " 222: 47e0 blx ip\n", nullptr }; diff --git a/compiler/utils/dedupe_set_test.cc b/compiler/utils/dedupe_set_test.cc index 4c0979e0b7..b390508ed4 100644 --- a/compiler/utils/dedupe_set_test.cc +++ b/compiler/utils/dedupe_set_test.cc @@ -23,7 +23,7 @@ #include "base/array_ref.h" #include "dedupe_set-inl.h" #include "gtest/gtest.h" -#include "thread-inl.h" +#include "thread-current-inl.h" namespace art { diff --git a/compiler/utils/label.h b/compiler/utils/label.h index 0f82ad5ff1..4c6ae8e218 100644 --- a/compiler/utils/label.h +++ b/compiler/utils/label.h @@ -29,24 +29,24 @@ class AssemblerFixup; namespace arm { class ArmAssembler; class Thumb2Assembler; -} +} // namespace arm namespace arm64 { class Arm64Assembler; -} +} // namespace arm64 namespace mips { class MipsAssembler; -} +} // namespace mips namespace mips64 { class Mips64Assembler; -} +} // namespace mips64 namespace x86 { class X86Assembler; class NearLabel; -} +} // namespace x86 namespace x86_64 { class X86_64Assembler; class NearLabel; -} +} // namespace x86_64 class ExternalLabel { public: diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h index 184cdf5050..2b7b2aa7ce 100644 --- a/compiler/utils/managed_register.h +++ b/compiler/utils/managed_register.h @@ -26,24 +26,24 @@ namespace art { namespace arm { class ArmManagedRegister; -} +} // namespace arm namespace arm64 { class Arm64ManagedRegister; -} +} // namespace arm64 namespace mips { class MipsManagedRegister; -} +} // namespace mips namespace mips64 { class Mips64ManagedRegister; -} +} // namespace mips64 namespace x86 { class X86ManagedRegister; -} +} // namespace x86 namespace x86_64 { class X86_64ManagedRegister; -} +} // namespace x86_64 class ManagedRegister : public ValueObject { public: diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index 57223b52a3..b8b800abe3 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -1356,6 +1356,106 @@ void Mips64Assembler::Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegiste EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x12); } +void Mips64Assembler::Add_aB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Add_aH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Add_aW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Add_aD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Ave_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Ave_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Ave_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Ave_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Ave_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Ave_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Ave_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Ave_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Aver_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Aver_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Aver_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Aver_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Aver_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x10); +} + void Mips64Assembler::FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { CHECK(HasMsa()); EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1b); @@ -1675,6 +1775,37 @@ void Mips64Assembler::StD(VectorRegister wd, GpuRegister rs, int offset) { EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x9, 0x3); } +void Mips64Assembler::IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x14); +} + +void Mips64Assembler::IlvrH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x14); +} + +void Mips64Assembler::IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x14); +} + +void Mips64Assembler::IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x14); +} + +void Mips64Assembler::ReplicateFPToVectorRegister(VectorRegister dst, + FpuRegister src, + bool is_double) { + // Float or double in FPU register Fx can be considered as 0th element in vector register Wx. + if (is_double) { + SplatiD(dst, static_cast<VectorRegister>(src), 0); + } else { + SplatiW(dst, static_cast<VectorRegister>(src), 0); + } +} + void Mips64Assembler::LoadConst32(GpuRegister rd, int32_t value) { TemplateLoadConst32(this, rd, value); } @@ -2702,6 +2833,94 @@ void Mips64Assembler::AdjustBaseAndOffset(GpuRegister& base, CHECK_EQ(misalignment, offset & (kMips64DoublewordSize - 1)); } +void Mips64Assembler::AdjustBaseOffsetAndElementSizeShift(GpuRegister& base, + int32_t& offset, + int& element_size_shift) { + // This method is used to adjust the base register, offset and element_size_shift + // for a vector load/store when the offset doesn't fit into allowed number of bits. + // MSA ld.df and st.df instructions take signed offsets as arguments, but maximum + // offset is dependant on the size of the data format df (10-bit offsets for ld.b, + // 11-bit for ld.h, 12-bit for ld.w and 13-bit for ld.d). + // If element_size_shift is non-negative at entry, it won't be changed, but offset + // will be checked for appropriate alignment. If negative at entry, it will be + // adjusted based on offset for maximum fit. + // It's assumed that `base` is a multiple of 8. + + CHECK_NE(base, AT); // Must not overwrite the register `base` while loading `offset`. + + if (element_size_shift >= 0) { + CHECK_LE(element_size_shift, TIMES_8); + CHECK_GE(JAVASTYLE_CTZ(offset), element_size_shift); + } else if (IsAligned<kMips64DoublewordSize>(offset)) { + element_size_shift = TIMES_8; + } else if (IsAligned<kMips64WordSize>(offset)) { + element_size_shift = TIMES_4; + } else if (IsAligned<kMips64HalfwordSize>(offset)) { + element_size_shift = TIMES_2; + } else { + element_size_shift = TIMES_1; + } + + const int low_len = 10 + element_size_shift; // How many low bits of `offset` ld.df/st.df + // will take. + int16_t low = offset & ((1 << low_len) - 1); // Isolate these bits. + low -= (low & (1 << (low_len - 1))) << 1; // Sign-extend these bits. + if (low == offset) { + return; // `offset` fits into ld.df/st.df. + } + + // First, see if `offset` can be represented as a sum of two signed offsets. + // This can save an instruction. + + // Max int16_t that's a multiple of element size. + const int32_t kMaxDeltaForSimpleAdjustment = 0x8000 - (1 << element_size_shift); + // Max ld.df/st.df offset that's a multiple of element size. + const int32_t kMaxLoadStoreOffset = 0x1ff << element_size_shift; + const int32_t kMaxOffsetForSimpleAdjustment = kMaxDeltaForSimpleAdjustment + kMaxLoadStoreOffset; + + if (IsInt<16>(offset)) { + Daddiu(AT, base, offset); + offset = 0; + } else if (0 <= offset && offset <= kMaxOffsetForSimpleAdjustment) { + Daddiu(AT, base, kMaxDeltaForSimpleAdjustment); + offset -= kMaxDeltaForSimpleAdjustment; + } else if (-kMaxOffsetForSimpleAdjustment <= offset && offset < 0) { + Daddiu(AT, base, -kMaxDeltaForSimpleAdjustment); + offset += kMaxDeltaForSimpleAdjustment; + } else { + // Let's treat `offset` as 64-bit to simplify handling of sign + // extensions in the instructions that supply its smaller signed parts. + // + // 16-bit or smaller parts of `offset`: + // |63 top 48|47 hi 32|31 upper 16|15 mid 13-10|12-9 low 0| + // + // Instructions that supply each part as a signed integer addend: + // |dati |dahi |daui |daddiu |ld.df/st.df | + // + // `top` is always 0, so dati isn't used. + // `hi` is 1 when `offset` is close to +2GB and 0 otherwise. + uint64_t tmp = static_cast<uint64_t>(offset) - low; // Exclude `low` from the rest of `offset` + // (accounts for sign of `low`). + tmp += (tmp & (UINT64_C(1) << 15)) << 1; // Account for sign extension in daddiu. + tmp += (tmp & (UINT64_C(1) << 31)) << 1; // Account for sign extension in daui. + int16_t mid = Low16Bits(tmp); + int16_t upper = High16Bits(tmp); + int16_t hi = Low16Bits(High32Bits(tmp)); + Daui(AT, base, upper); + if (hi != 0) { + CHECK_EQ(hi, 1); + Dahi(AT, hi); + } + if (mid != 0) { + Daddiu(AT, AT, mid); + } + offset = low; + } + base = AT; + CHECK_GE(JAVASTYLE_CTZ(offset), element_size_shift); + CHECK(IsInt<10>(offset >> element_size_shift)); +} + void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 666c6935a1..9b4064543f 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -278,14 +278,16 @@ enum LoadOperandType { kLoadUnsignedHalfword, kLoadWord, kLoadUnsignedWord, - kLoadDoubleword + kLoadDoubleword, + kLoadQuadword }; enum StoreOperandType { kStoreByte, kStoreHalfword, kStoreWord, - kStoreDoubleword + kStoreDoubleword, + kStoreQuadword }; // Used to test the values returned by ClassS/ClassD. @@ -682,6 +684,26 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); void Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); void Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Add_aB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Add_aH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Add_aW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Add_aD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); void FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt); void FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt); @@ -747,6 +769,14 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void StW(VectorRegister wd, GpuRegister rs, int offset); void StD(VectorRegister wd, GpuRegister rs, int offset); + void IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void IlvrH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + + // Helper for replicating floating point value in all destination elements. + void ReplicateFPToVectorRegister(VectorRegister dst, FpuRegister src, bool is_double); + // Higher level composite instructions. int InstrCountForLoadReplicatedConst32(int64_t); void LoadConst32(GpuRegister rd, int32_t value); @@ -876,6 +906,10 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size); void AdjustBaseAndOffset(GpuRegister& base, int32_t& offset, bool is_doubleword); + // If element_size_shift is negative at entry, its value will be calculated based on the offset. + void AdjustBaseOffsetAndElementSizeShift(GpuRegister& base, + int32_t& offset, + int& element_size_shift); private: // This will be used as an argument for loads/stores @@ -999,6 +1033,8 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer null_checker(); } break; + default: + LOG(FATAL) << "UNREACHABLE"; } if (type != kLoadDoubleword) { null_checker(); @@ -1011,7 +1047,12 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer GpuRegister base, int32_t offset, ImplicitNullChecker null_checker = NoImplicitNullChecker()) { - AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword)); + int element_size_shift = -1; + if (type != kLoadQuadword) { + AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword)); + } else { + AdjustBaseOffsetAndElementSizeShift(base, offset, element_size_shift); + } switch (type) { case kLoadWord: @@ -1031,6 +1072,17 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer null_checker(); } break; + case kLoadQuadword: + switch (element_size_shift) { + case TIMES_1: LdB(static_cast<VectorRegister>(reg), base, offset); break; + case TIMES_2: LdH(static_cast<VectorRegister>(reg), base, offset); break; + case TIMES_4: LdW(static_cast<VectorRegister>(reg), base, offset); break; + case TIMES_8: LdD(static_cast<VectorRegister>(reg), base, offset); break; + default: + LOG(FATAL) << "UNREACHABLE"; + } + null_checker(); + break; default: LOG(FATAL) << "UNREACHABLE"; } @@ -1084,7 +1136,12 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer GpuRegister base, int32_t offset, ImplicitNullChecker null_checker = NoImplicitNullChecker()) { - AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword)); + int element_size_shift = -1; + if (type != kStoreQuadword) { + AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword)); + } else { + AdjustBaseOffsetAndElementSizeShift(base, offset, element_size_shift); + } switch (type) { case kStoreWord: @@ -1104,6 +1161,17 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer null_checker(); } break; + case kStoreQuadword: + switch (element_size_shift) { + case TIMES_1: StB(static_cast<VectorRegister>(reg), base, offset); break; + case TIMES_2: StH(static_cast<VectorRegister>(reg), base, offset); break; + case TIMES_4: StW(static_cast<VectorRegister>(reg), base, offset); break; + case TIMES_8: StD(static_cast<VectorRegister>(reg), base, offset); break; + default: + LOG(FATAL) << "UNREACHABLE"; + } + null_checker(); + break; default: LOG(FATAL) << "UNREACHABLE"; } diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index f2e3b1610c..fbebe0ce15 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -1970,6 +1970,50 @@ TEST_F(AssemblerMIPS64Test, LoadFpuFromOffset) { __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, -32768); __ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0xABCDEF00); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 8); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 511); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 512); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 513); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 514); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 516); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1022); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1024); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1025); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1026); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1028); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2044); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2048); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2049); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2050); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2052); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4088); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4096); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4097); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4098); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4100); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4104); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x7FFC); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x8000); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x10000); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x12345678); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x12350078); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -256); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -511); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -513); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -1022); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -1026); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -2044); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -2052); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -4096); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -4104); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -32768); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0xABCDEF00); + __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x7FFFABCD); + const char* expected = "lwc1 $f0, 0($a0)\n" "lwc1 $f0, 4($a0)\n" @@ -2010,7 +2054,78 @@ TEST_F(AssemblerMIPS64Test, LoadFpuFromOffset) { "ldc1 $f0, -256($a0)\n" "ldc1 $f0, -32768($a0)\n" "daui $at, $a0, 0xABCE\n" - "ldc1 $f0, -0x1100($at) # 0xEF00\n"; + "ldc1 $f0, -0x1100($at) # 0xEF00\n" + + "ld.d $w0, 0($a0)\n" + "ld.b $w0, 1($a0)\n" + "ld.h $w0, 2($a0)\n" + "ld.w $w0, 4($a0)\n" + "ld.d $w0, 8($a0)\n" + "ld.b $w0, 511($a0)\n" + "ld.d $w0, 512($a0)\n" + "daddiu $at, $a0, 513\n" + "ld.b $w0, 0($at)\n" + "ld.h $w0, 514($a0)\n" + "ld.w $w0, 516($a0)\n" + "ld.h $w0, 1022($a0)\n" + "ld.d $w0, 1024($a0)\n" + "daddiu $at, $a0, 1025\n" + "ld.b $w0, 0($at)\n" + "daddiu $at, $a0, 1026\n" + "ld.h $w0, 0($at)\n" + "ld.w $w0, 1028($a0)\n" + "ld.w $w0, 2044($a0)\n" + "ld.d $w0, 2048($a0)\n" + "daddiu $at, $a0, 2049\n" + "ld.b $w0, 0($at)\n" + "daddiu $at, $a0, 2050\n" + "ld.h $w0, 0($at)\n" + "daddiu $at, $a0, 2052\n" + "ld.w $w0, 0($at)\n" + "ld.d $w0, 4088($a0)\n" + "daddiu $at, $a0, 4096\n" + "ld.d $w0, 0($at)\n" + "daddiu $at, $a0, 4097\n" + "ld.b $w0, 0($at)\n" + "daddiu $at, $a0, 4098\n" + "ld.h $w0, 0($at)\n" + "daddiu $at, $a0, 4100\n" + "ld.w $w0, 0($at)\n" + "daddiu $at, $a0, 4104\n" + "ld.d $w0, 0($at)\n" + "daddiu $at, $a0, 0x7FFC\n" + "ld.w $w0, 0($at)\n" + "daddiu $at, $a0, 0x7FF8\n" + "ld.d $w0, 8($at)\n" + "daui $at, $a0, 0x1\n" + "ld.d $w0, 0($at)\n" + "daui $at, $a0, 0x1234\n" + "daddiu $at, $at, 0x6000\n" + "ld.d $w0, -2440($at) # 0xF678\n" + "daui $at, $a0, 0x1235\n" + "ld.d $w0, 0x78($at)\n" + "ld.d $w0, -256($a0)\n" + "ld.b $w0, -511($a0)\n" + "daddiu $at, $a0, -513\n" + "ld.b $w0, 0($at)\n" + "ld.h $w0, -1022($a0)\n" + "daddiu $at, $a0, -1026\n" + "ld.h $w0, 0($at)\n" + "ld.w $w0, -2044($a0)\n" + "daddiu $at, $a0, -2052\n" + "ld.w $w0, 0($at)\n" + "ld.d $w0, -4096($a0)\n" + "daddiu $at, $a0, -4104\n" + "ld.d $w0, 0($at)\n" + "daddiu $at, $a0, -32768\n" + "ld.d $w0, 0($at)\n" + "daui $at, $a0, 0xABCE\n" + "daddiu $at, $at, -8192 # 0xE000\n" + "ld.d $w0, 0xF00($at)\n" + "daui $at, $a0, 0x8000\n" + "dahi $at, $at, 1\n" + "daddiu $at, $at, -21504 # 0xAC00\n" + "ld.b $w0, -51($at) # 0xFFCD\n"; DriverStr(expected, "LoadFpuFromOffset"); } @@ -2200,6 +2315,50 @@ TEST_F(AssemblerMIPS64Test, StoreFpuToOffset) { __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, -32768); __ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0xABCDEF00); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 8); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 511); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 512); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 513); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 514); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 516); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1022); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1024); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1025); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1026); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1028); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2044); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2048); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2049); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2050); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2052); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4088); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4096); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4097); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4098); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4100); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4104); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x7FFC); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x8000); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x10000); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x12345678); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x12350078); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -256); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -511); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -513); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -1022); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -1026); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -2044); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -2052); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -4096); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -4104); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -32768); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0xABCDEF00); + __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x7FFFABCD); + const char* expected = "swc1 $f0, 0($a0)\n" "swc1 $f0, 4($a0)\n" @@ -2240,7 +2399,78 @@ TEST_F(AssemblerMIPS64Test, StoreFpuToOffset) { "sdc1 $f0, -256($a0)\n" "sdc1 $f0, -32768($a0)\n" "daui $at, $a0, 0xABCE\n" - "sdc1 $f0, -0x1100($at)\n"; + "sdc1 $f0, -0x1100($at)\n" + + "st.d $w0, 0($a0)\n" + "st.b $w0, 1($a0)\n" + "st.h $w0, 2($a0)\n" + "st.w $w0, 4($a0)\n" + "st.d $w0, 8($a0)\n" + "st.b $w0, 511($a0)\n" + "st.d $w0, 512($a0)\n" + "daddiu $at, $a0, 513\n" + "st.b $w0, 0($at)\n" + "st.h $w0, 514($a0)\n" + "st.w $w0, 516($a0)\n" + "st.h $w0, 1022($a0)\n" + "st.d $w0, 1024($a0)\n" + "daddiu $at, $a0, 1025\n" + "st.b $w0, 0($at)\n" + "daddiu $at, $a0, 1026\n" + "st.h $w0, 0($at)\n" + "st.w $w0, 1028($a0)\n" + "st.w $w0, 2044($a0)\n" + "st.d $w0, 2048($a0)\n" + "daddiu $at, $a0, 2049\n" + "st.b $w0, 0($at)\n" + "daddiu $at, $a0, 2050\n" + "st.h $w0, 0($at)\n" + "daddiu $at, $a0, 2052\n" + "st.w $w0, 0($at)\n" + "st.d $w0, 4088($a0)\n" + "daddiu $at, $a0, 4096\n" + "st.d $w0, 0($at)\n" + "daddiu $at, $a0, 4097\n" + "st.b $w0, 0($at)\n" + "daddiu $at, $a0, 4098\n" + "st.h $w0, 0($at)\n" + "daddiu $at, $a0, 4100\n" + "st.w $w0, 0($at)\n" + "daddiu $at, $a0, 4104\n" + "st.d $w0, 0($at)\n" + "daddiu $at, $a0, 0x7FFC\n" + "st.w $w0, 0($at)\n" + "daddiu $at, $a0, 0x7FF8\n" + "st.d $w0, 8($at)\n" + "daui $at, $a0, 0x1\n" + "st.d $w0, 0($at)\n" + "daui $at, $a0, 0x1234\n" + "daddiu $at, $at, 0x6000\n" + "st.d $w0, -2440($at) # 0xF678\n" + "daui $at, $a0, 0x1235\n" + "st.d $w0, 0x78($at)\n" + "st.d $w0, -256($a0)\n" + "st.b $w0, -511($a0)\n" + "daddiu $at, $a0, -513\n" + "st.b $w0, 0($at)\n" + "st.h $w0, -1022($a0)\n" + "daddiu $at, $a0, -1026\n" + "st.h $w0, 0($at)\n" + "st.w $w0, -2044($a0)\n" + "daddiu $at, $a0, -2052\n" + "st.w $w0, 0($at)\n" + "st.d $w0, -4096($a0)\n" + "daddiu $at, $a0, -4104\n" + "st.d $w0, 0($at)\n" + "daddiu $at, $a0, -32768\n" + "st.d $w0, 0($at)\n" + "daui $at, $a0, 0xABCE\n" + "daddiu $at, $at, -8192 # 0xE000\n" + "st.d $w0, 0xF00($at)\n" + "daui $at, $a0, 0x8000\n" + "dahi $at, $at, 1\n" + "daddiu $at, $at, -21504 # 0xAC00\n" + "st.b $w0, -51($at) # 0xFFCD\n"; DriverStr(expected, "StoreFpuToOffset"); } @@ -2668,6 +2898,106 @@ TEST_F(AssemblerMIPS64Test, Mod_uD) { "mod_u.d"); } +TEST_F(AssemblerMIPS64Test, Add_aB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Add_aB, "add_a.b ${reg1}, ${reg2}, ${reg3}"), + "add_a.b"); +} + +TEST_F(AssemblerMIPS64Test, Add_aH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Add_aH, "add_a.h ${reg1}, ${reg2}, ${reg3}"), + "add_a.h"); +} + +TEST_F(AssemblerMIPS64Test, Add_aW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Add_aW, "add_a.w ${reg1}, ${reg2}, ${reg3}"), + "add_a.w"); +} + +TEST_F(AssemblerMIPS64Test, Add_aD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Add_aD, "add_a.d ${reg1}, ${reg2}, ${reg3}"), + "add_a.d"); +} + +TEST_F(AssemblerMIPS64Test, Ave_sB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_sB, "ave_s.b ${reg1}, ${reg2}, ${reg3}"), + "ave_s.b"); +} + +TEST_F(AssemblerMIPS64Test, Ave_sH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_sH, "ave_s.h ${reg1}, ${reg2}, ${reg3}"), + "ave_s.h"); +} + +TEST_F(AssemblerMIPS64Test, Ave_sW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_sW, "ave_s.w ${reg1}, ${reg2}, ${reg3}"), + "ave_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Ave_sD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_sD, "ave_s.d ${reg1}, ${reg2}, ${reg3}"), + "ave_s.d"); +} + +TEST_F(AssemblerMIPS64Test, Ave_uB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_uB, "ave_u.b ${reg1}, ${reg2}, ${reg3}"), + "ave_u.b"); +} + +TEST_F(AssemblerMIPS64Test, Ave_uH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_uH, "ave_u.h ${reg1}, ${reg2}, ${reg3}"), + "ave_u.h"); +} + +TEST_F(AssemblerMIPS64Test, Ave_uW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_uW, "ave_u.w ${reg1}, ${reg2}, ${reg3}"), + "ave_u.w"); +} + +TEST_F(AssemblerMIPS64Test, Ave_uD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_uD, "ave_u.d ${reg1}, ${reg2}, ${reg3}"), + "ave_u.d"); +} + +TEST_F(AssemblerMIPS64Test, Aver_sB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_sB, "aver_s.b ${reg1}, ${reg2}, ${reg3}"), + "aver_s.b"); +} + +TEST_F(AssemblerMIPS64Test, Aver_sH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_sH, "aver_s.h ${reg1}, ${reg2}, ${reg3}"), + "aver_s.h"); +} + +TEST_F(AssemblerMIPS64Test, Aver_sW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_sW, "aver_s.w ${reg1}, ${reg2}, ${reg3}"), + "aver_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Aver_sD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_sD, "aver_s.d ${reg1}, ${reg2}, ${reg3}"), + "aver_s.d"); +} + +TEST_F(AssemblerMIPS64Test, Aver_uB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_uB, "aver_u.b ${reg1}, ${reg2}, ${reg3}"), + "aver_u.b"); +} + +TEST_F(AssemblerMIPS64Test, Aver_uH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_uH, "aver_u.h ${reg1}, ${reg2}, ${reg3}"), + "aver_u.h"); +} + +TEST_F(AssemblerMIPS64Test, Aver_uW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_uW, "aver_u.w ${reg1}, ${reg2}, ${reg3}"), + "aver_u.w"); +} + +TEST_F(AssemblerMIPS64Test, Aver_uD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_uD, "aver_u.d ${reg1}, ${reg2}, ${reg3}"), + "aver_u.d"); +} + TEST_F(AssemblerMIPS64Test, FaddW) { DriverStr(RepeatVVV(&mips64::Mips64Assembler::FaddW, "fadd.w ${reg1}, ${reg2}, ${reg3}"), "fadd.w"); @@ -2890,6 +3220,26 @@ TEST_F(AssemblerMIPS64Test, StD) { "st.d"); } +TEST_F(AssemblerMIPS64Test, IlvrB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvrB, "ilvr.b ${reg1}, ${reg2}, ${reg3}"), + "ilvr.b"); +} + +TEST_F(AssemblerMIPS64Test, IlvrH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvrH, "ilvr.h ${reg1}, ${reg2}, ${reg3}"), + "ilvr.h"); +} + +TEST_F(AssemblerMIPS64Test, IlvrW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvrW, "ilvr.w ${reg1}, ${reg2}, ${reg3}"), + "ilvr.w"); +} + +TEST_F(AssemblerMIPS64Test, IlvrD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvrD, "ilvr.d ${reg1}, ${reg2}, ${reg3}"), + "ilvr.d"); +} + #undef __ } // namespace art diff --git a/compiler/utils/swap_space.cc b/compiler/utils/swap_space.cc index a1eb08e041..4f6c915142 100644 --- a/compiler/utils/swap_space.cc +++ b/compiler/utils/swap_space.cc @@ -23,7 +23,7 @@ #include "base/logging.h" #include "base/macros.h" #include "base/mutex.h" -#include "thread-inl.h" +#include "thread-current-inl.h" namespace art { diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h index c286b820fe..0ff9fc69ed 100644 --- a/compiler/utils/swap_space.h +++ b/compiler/utils/swap_space.h @@ -78,7 +78,7 @@ class SwapSpace { mutable FreeByStartSet::const_iterator free_by_start_entry; }; struct FreeBySizeComparator { - bool operator()(const FreeBySizeEntry& lhs, const FreeBySizeEntry& rhs) { + bool operator()(const FreeBySizeEntry& lhs, const FreeBySizeEntry& rhs) const { if (lhs.size != rhs.size) { return lhs.size < rhs.size; } else { diff --git a/compiler/utils/type_reference.h b/compiler/utils/type_reference.h deleted file mode 100644 index a0fa1a4a63..0000000000 --- a/compiler/utils/type_reference.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_UTILS_TYPE_REFERENCE_H_ -#define ART_COMPILER_UTILS_TYPE_REFERENCE_H_ - -#include <stdint.h> - -#include "base/logging.h" -#include "dex_file_types.h" -#include "string_reference.h" - -namespace art { - -class DexFile; - -// A type is located by its DexFile and the string_ids_ table index into that DexFile. -struct TypeReference { - TypeReference(const DexFile* file, dex::TypeIndex index) : dex_file(file), type_index(index) { } - - const DexFile* dex_file; - dex::TypeIndex type_index; -}; - -// Compare the actual referenced type names. Used for type reference deduplication. -struct TypeReferenceValueComparator { - bool operator()(TypeReference tr1, TypeReference tr2) const { - // Note that we want to deduplicate identical boot image types even if they are - // referenced by different dex files, so we simply compare the descriptors. - StringReference sr1(tr1.dex_file, tr1.dex_file->GetTypeId(tr1.type_index).descriptor_idx_); - StringReference sr2(tr2.dex_file, tr2.dex_file->GetTypeId(tr2.type_index).descriptor_idx_); - return StringReferenceValueComparator()(sr1, sr2); - } -}; - -} // namespace art - -#endif // ART_COMPILER_UTILS_TYPE_REFERENCE_H_ diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 1736618363..bef32f8254 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1238,6 +1238,139 @@ void X86Assembler::pavgw(XmmRegister dst, XmmRegister src) { EmitXmmRegisterOperand(dst, src); } +void X86Assembler::pminsb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x38); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pmaxsb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3C); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pminsw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xEA); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pmaxsw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xEE); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pminsd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x39); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pmaxsd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3D); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pminub(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xDA); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pmaxub(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xDE); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pminuw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3A); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pmaxuw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3E); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pminud(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3B); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pmaxud(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3F); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::minps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x5D); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::maxps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x5F); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::minpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x5D); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::maxpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x5F); + EmitXmmRegisterOperand(dst, src); +} void X86Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index a747cda7bd..c4bb9ee18a 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -498,6 +498,25 @@ class X86Assembler FINAL : public Assembler { void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now) void pavgw(XmmRegister dst, XmmRegister src); + void pminsb(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void pmaxsb(XmmRegister dst, XmmRegister src); + void pminsw(XmmRegister dst, XmmRegister src); + void pmaxsw(XmmRegister dst, XmmRegister src); + void pminsd(XmmRegister dst, XmmRegister src); + void pmaxsd(XmmRegister dst, XmmRegister src); + + void pminub(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void pmaxub(XmmRegister dst, XmmRegister src); + void pminuw(XmmRegister dst, XmmRegister src); + void pmaxuw(XmmRegister dst, XmmRegister src); + void pminud(XmmRegister dst, XmmRegister src); + void pmaxud(XmmRegister dst, XmmRegister src); + + void minps(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void maxps(XmmRegister dst, XmmRegister src); + void minpd(XmmRegister dst, XmmRegister src); + void maxpd(XmmRegister dst, XmmRegister src); + void pcmpeqb(XmmRegister dst, XmmRegister src); void pcmpeqw(XmmRegister dst, XmmRegister src); void pcmpeqd(XmmRegister dst, XmmRegister src); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index f75f972265..34f2a47c27 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -613,6 +613,70 @@ TEST_F(AssemblerX86Test, PAvgW) { DriverStr(RepeatFF(&x86::X86Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw"); } +TEST_F(AssemblerX86Test, PMinSB) { + DriverStr(RepeatFF(&x86::X86Assembler::pminsb, "pminsb %{reg2}, %{reg1}"), "pminsb"); +} + +TEST_F(AssemblerX86Test, PMaxSB) { + DriverStr(RepeatFF(&x86::X86Assembler::pmaxsb, "pmaxsb %{reg2}, %{reg1}"), "pmaxsb"); +} + +TEST_F(AssemblerX86Test, PMinSW) { + DriverStr(RepeatFF(&x86::X86Assembler::pminsw, "pminsw %{reg2}, %{reg1}"), "pminsw"); +} + +TEST_F(AssemblerX86Test, PMaxSW) { + DriverStr(RepeatFF(&x86::X86Assembler::pmaxsw, "pmaxsw %{reg2}, %{reg1}"), "pmaxsw"); +} + +TEST_F(AssemblerX86Test, PMinSD) { + DriverStr(RepeatFF(&x86::X86Assembler::pminsd, "pminsd %{reg2}, %{reg1}"), "pminsd"); +} + +TEST_F(AssemblerX86Test, PMaxSD) { + DriverStr(RepeatFF(&x86::X86Assembler::pmaxsd, "pmaxsd %{reg2}, %{reg1}"), "pmaxsd"); +} + +TEST_F(AssemblerX86Test, PMinUB) { + DriverStr(RepeatFF(&x86::X86Assembler::pminub, "pminub %{reg2}, %{reg1}"), "pminub"); +} + +TEST_F(AssemblerX86Test, PMaxUB) { + DriverStr(RepeatFF(&x86::X86Assembler::pmaxub, "pmaxub %{reg2}, %{reg1}"), "pmaxub"); +} + +TEST_F(AssemblerX86Test, PMinUW) { + DriverStr(RepeatFF(&x86::X86Assembler::pminuw, "pminuw %{reg2}, %{reg1}"), "pminuw"); +} + +TEST_F(AssemblerX86Test, PMaxUW) { + DriverStr(RepeatFF(&x86::X86Assembler::pmaxuw, "pmaxuw %{reg2}, %{reg1}"), "pmaxuw"); +} + +TEST_F(AssemblerX86Test, PMinUD) { + DriverStr(RepeatFF(&x86::X86Assembler::pminud, "pminud %{reg2}, %{reg1}"), "pminud"); +} + +TEST_F(AssemblerX86Test, PMaxUD) { + DriverStr(RepeatFF(&x86::X86Assembler::pmaxud, "pmaxud %{reg2}, %{reg1}"), "pmaxud"); +} + +TEST_F(AssemblerX86Test, MinPS) { + DriverStr(RepeatFF(&x86::X86Assembler::minps, "minps %{reg2}, %{reg1}"), "minps"); +} + +TEST_F(AssemblerX86Test, MaxPS) { + DriverStr(RepeatFF(&x86::X86Assembler::maxps, "maxps %{reg2}, %{reg1}"), "maxps"); +} + +TEST_F(AssemblerX86Test, MinPD) { + DriverStr(RepeatFF(&x86::X86Assembler::minpd, "minpd %{reg2}, %{reg1}"), "minpd"); +} + +TEST_F(AssemblerX86Test, MaxPD) { + DriverStr(RepeatFF(&x86::X86Assembler::maxpd, "maxpd %{reg2}, %{reg1}"), "maxpd"); +} + TEST_F(AssemblerX86Test, PCmpeqB) { DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "cmpeqb"); } diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 1b7a4850db..82d1174a25 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1445,6 +1445,156 @@ void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) { EmitXmmRegisterOperand(dst.LowBits(), src); } +void X86_64Assembler::pminsb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x38); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pmaxsb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3C); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pminsw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xEA); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pmaxsw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xEE); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pminsd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x39); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pmaxsd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3D); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pminub(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xDA); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pmaxub(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xDE); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pminuw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3A); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pmaxuw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3E); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pminud(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3B); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pmaxud(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3F); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::minps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x5D); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::maxps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x5F); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::minpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x5D); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::maxpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x5F); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 0ddc46ca44..6e584fece1 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -526,6 +526,25 @@ class X86_64Assembler FINAL : public Assembler { void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now) void pavgw(XmmRegister dst, XmmRegister src); + void pminsb(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void pmaxsb(XmmRegister dst, XmmRegister src); + void pminsw(XmmRegister dst, XmmRegister src); + void pmaxsw(XmmRegister dst, XmmRegister src); + void pminsd(XmmRegister dst, XmmRegister src); + void pmaxsd(XmmRegister dst, XmmRegister src); + + void pminub(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void pmaxub(XmmRegister dst, XmmRegister src); + void pminuw(XmmRegister dst, XmmRegister src); + void pmaxuw(XmmRegister dst, XmmRegister src); + void pminud(XmmRegister dst, XmmRegister src); + void pmaxud(XmmRegister dst, XmmRegister src); + + void minps(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void maxps(XmmRegister dst, XmmRegister src); + void minpd(XmmRegister dst, XmmRegister src); + void maxpd(XmmRegister dst, XmmRegister src); + void pcmpeqb(XmmRegister dst, XmmRegister src); void pcmpeqw(XmmRegister dst, XmmRegister src); void pcmpeqd(XmmRegister dst, XmmRegister src); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index e7d8401e29..b57400334c 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -1301,6 +1301,70 @@ TEST_F(AssemblerX86_64Test, Pavgw) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw"); } +TEST_F(AssemblerX86_64Test, Pminsb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsb, "pminsb %{reg2}, %{reg1}"), "pminsb"); +} + +TEST_F(AssemblerX86_64Test, Pmaxsb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxsb, "pmaxsb %{reg2}, %{reg1}"), "pmaxsb"); +} + +TEST_F(AssemblerX86_64Test, Pminsw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsw, "pminsw %{reg2}, %{reg1}"), "pminsw"); +} + +TEST_F(AssemblerX86_64Test, Pmaxsw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxsw, "pmaxsw %{reg2}, %{reg1}"), "pmaxsw"); +} + +TEST_F(AssemblerX86_64Test, Pminsd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsd, "pminsd %{reg2}, %{reg1}"), "pminsd"); +} + +TEST_F(AssemblerX86_64Test, Pmaxsd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxsd, "pmaxsd %{reg2}, %{reg1}"), "pmaxsd"); +} + +TEST_F(AssemblerX86_64Test, Pminub) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminub, "pminub %{reg2}, %{reg1}"), "pminub"); +} + +TEST_F(AssemblerX86_64Test, Pmaxub) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxub, "pmaxub %{reg2}, %{reg1}"), "pmaxub"); +} + +TEST_F(AssemblerX86_64Test, Pminuw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminuw, "pminuw %{reg2}, %{reg1}"), "pminuw"); +} + +TEST_F(AssemblerX86_64Test, Pmaxuw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxuw, "pmaxuw %{reg2}, %{reg1}"), "pmaxuw"); +} + +TEST_F(AssemblerX86_64Test, Pminud) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminud, "pminud %{reg2}, %{reg1}"), "pminud"); +} + +TEST_F(AssemblerX86_64Test, Pmaxud) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxud, "pmaxud %{reg2}, %{reg1}"), "pmaxud"); +} + +TEST_F(AssemblerX86_64Test, Minps) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::minps, "minps %{reg2}, %{reg1}"), "minps"); +} + +TEST_F(AssemblerX86_64Test, Maxps) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::maxps, "maxps %{reg2}, %{reg1}"), "maxps"); +} + +TEST_F(AssemblerX86_64Test, Minpd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::minpd, "minpd %{reg2}, %{reg1}"), "minpd"); +} + +TEST_F(AssemblerX86_64Test, Maxpd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::maxpd, "maxpd %{reg2}, %{reg1}"), "maxpd"); +} + TEST_F(AssemblerX86_64Test, PCmpeqb) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "pcmpeqb"); } diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc index 4d55eb08b2..dd09fed06e 100644 --- a/compiler/verifier_deps_test.cc +++ b/compiler/verifier_deps_test.cc @@ -25,8 +25,8 @@ #include "dex/verified_method.h" #include "dex_file.h" #include "dex_file_types.h" +#include "driver/compiler_driver-inl.h" #include "driver/compiler_options.h" -#include "driver/compiler_driver.h" #include "handle_scope-inl.h" #include "indenter.h" #include "mirror/class_loader.h" |