diff options
Diffstat (limited to 'compiler')
76 files changed, 3331 insertions, 1386 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index 307a42cbba..a1269dcaf9 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -110,7 +110,6 @@ art_cc_defaults { "optimizing/code_generator_vector_arm.cc", "optimizing/code_generator_arm_vixl.cc", "optimizing/code_generator_vector_arm_vixl.cc", - "optimizing/dex_cache_array_fixups_arm.cc", "optimizing/instruction_simplifier_arm.cc", "optimizing/instruction_simplifier_shared.cc", "optimizing/intrinsics_arm.cc", @@ -145,7 +144,6 @@ art_cc_defaults { "linker/mips/relative_patcher_mips.cc", "optimizing/code_generator_mips.cc", "optimizing/code_generator_vector_mips.cc", - "optimizing/dex_cache_array_fixups_mips.cc", "optimizing/intrinsics_mips.cc", "optimizing/pc_relative_fixups_mips.cc", "utils/mips/assembler_mips.cc", @@ -342,6 +340,7 @@ art_cc_test { "image_test.cc", "image_write_read_test.cc", "jni/jni_compiler_test.cc", + "linker/method_bss_mapping_encoder_test.cc", "linker/multi_oat_relative_patcher_test.cc", "linker/output_stream_test.cc", "oat_test.cc", diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index a1ee68faeb..3683695a1b 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -19,6 +19,7 @@ #include "arch/instruction_set_features.h" #include "art_field-inl.h" #include "art_method-inl.h" +#include "base/callee_save_type.h" #include "base/enums.h" #include "class_linker.h" #include "compiled_method.h" @@ -166,8 +167,8 @@ void CommonCompilerTest::SetUp() { instruction_set_features_ = InstructionSetFeatures::FromCppDefines(); runtime_->SetInstructionSet(instruction_set); - for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) { - Runtime::CalleeSaveType type = Runtime::CalleeSaveType(i); + for (uint32_t i = 0; i < static_cast<uint32_t>(CalleeSaveType::kLastCalleeSaveType); ++i) { + CalleeSaveType type = CalleeSaveType(i); if (!runtime_->HasCalleeSaveMethod(type)) { runtime_->SetCalleeSaveMethod(runtime_->CreateCalleeSaveMethod(), type); } diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h index 0ca23a5c50..761e9e19a8 100644 --- a/compiler/compiled_method.h +++ b/compiler/compiled_method.h @@ -120,13 +120,13 @@ class LinkerPatch { // patch_type_ as an uintN_t and do explicit static_cast<>s. enum class Type : uint8_t { kMethodRelative, // NOTE: Actual patching is instruction_set-dependent. + kMethodBssEntry, // NOTE: Actual patching is instruction_set-dependent. kCall, kCallRelative, // NOTE: Actual patching is instruction_set-dependent. kTypeRelative, // NOTE: Actual patching is instruction_set-dependent. kTypeBssEntry, // NOTE: Actual patching is instruction_set-dependent. kStringRelative, // NOTE: Actual patching is instruction_set-dependent. kStringBssEntry, // NOTE: Actual patching is instruction_set-dependent. - kDexCacheArray, // NOTE: Actual patching is instruction_set-dependent. kBakerReadBarrierBranch, // NOTE: Actual patching is instruction_set-dependent. }; @@ -140,6 +140,16 @@ class LinkerPatch { return patch; } + static LinkerPatch MethodBssEntryPatch(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t target_method_idx) { + LinkerPatch patch(literal_offset, Type::kMethodBssEntry, target_dex_file); + patch.method_idx_ = target_method_idx; + patch.pc_insn_offset_ = pc_insn_offset; + return patch; + } + static LinkerPatch CodePatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t target_method_idx) { @@ -196,16 +206,6 @@ class LinkerPatch { return patch; } - static LinkerPatch DexCacheArrayPatch(size_t literal_offset, - const DexFile* target_dex_file, - uint32_t pc_insn_offset, - uint32_t element_offset) { - LinkerPatch patch(literal_offset, Type::kDexCacheArray, target_dex_file); - patch.pc_insn_offset_ = pc_insn_offset; - patch.element_offset_ = element_offset; - return patch; - } - static LinkerPatch BakerReadBarrierBranchPatch(size_t literal_offset, uint32_t custom_value1 = 0u, uint32_t custom_value2 = 0u) { @@ -229,12 +229,12 @@ class LinkerPatch { bool IsPcRelative() const { switch (GetType()) { case Type::kMethodRelative: + case Type::kMethodBssEntry: case Type::kCallRelative: case Type::kTypeRelative: case Type::kTypeBssEntry: case Type::kStringRelative: case Type::kStringBssEntry: - case Type::kDexCacheArray: case Type::kBakerReadBarrierBranch: return true; default: @@ -244,6 +244,7 @@ class LinkerPatch { MethodReference TargetMethod() const { DCHECK(patch_type_ == Type::kMethodRelative || + patch_type_ == Type::kMethodBssEntry || patch_type_ == Type::kCall || patch_type_ == Type::kCallRelative); return MethodReference(target_dex_file_, method_idx_); @@ -273,23 +274,13 @@ class LinkerPatch { return dex::StringIndex(string_idx_); } - const DexFile* TargetDexCacheDexFile() const { - DCHECK(patch_type_ == Type::kDexCacheArray); - return target_dex_file_; - } - - size_t TargetDexCacheElementOffset() const { - DCHECK(patch_type_ == Type::kDexCacheArray); - return element_offset_; - } - uint32_t PcInsnOffset() const { DCHECK(patch_type_ == Type::kMethodRelative || + patch_type_ == Type::kMethodBssEntry || patch_type_ == Type::kTypeRelative || patch_type_ == Type::kTypeBssEntry || patch_type_ == Type::kStringRelative || - patch_type_ == Type::kStringBssEntry || - patch_type_ == Type::kDexCacheArray); + patch_type_ == Type::kStringBssEntry); return pc_insn_offset_; } @@ -324,12 +315,10 @@ class LinkerPatch { uint32_t method_idx_; // Method index for Call/Method patches. uint32_t type_idx_; // Type index for Type patches. uint32_t string_idx_; // String index for String patches. - uint32_t element_offset_; // Element offset in the dex cache arrays. uint32_t baker_custom_value1_; static_assert(sizeof(method_idx_) == sizeof(cmp1_), "needed by relational operators"); static_assert(sizeof(type_idx_) == sizeof(cmp1_), "needed by relational operators"); static_assert(sizeof(string_idx_) == sizeof(cmp1_), "needed by relational operators"); - static_assert(sizeof(element_offset_) == sizeof(cmp1_), "needed by relational operators"); static_assert(sizeof(baker_custom_value1_) == sizeof(cmp1_), "needed by relational operators"); }; union { diff --git a/compiler/compiled_method_test.cc b/compiler/compiled_method_test.cc index 72b2282ade..f4a72cf2cc 100644 --- a/compiler/compiled_method_test.cc +++ b/compiler/compiled_method_test.cc @@ -58,6 +58,14 @@ TEST(CompiledMethod, LinkerPatchOperators) { LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3001u, 1000u), LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3000u, 1001u), LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3001u, 1001u), + LinkerPatch::MethodBssEntryPatch(16u, dex_file1, 3000u, 1000u), + LinkerPatch::MethodBssEntryPatch(16u, dex_file1, 3001u, 1000u), + LinkerPatch::MethodBssEntryPatch(16u, dex_file1, 3000u, 1001u), + LinkerPatch::MethodBssEntryPatch(16u, dex_file1, 3001u, 1001u), + LinkerPatch::MethodBssEntryPatch(16u, dex_file2, 3000u, 1000u), + LinkerPatch::MethodBssEntryPatch(16u, dex_file2, 3001u, 1000u), + LinkerPatch::MethodBssEntryPatch(16u, dex_file2, 3000u, 1001u), + LinkerPatch::MethodBssEntryPatch(16u, dex_file2, 3001u, 1001u), LinkerPatch::CodePatch(16u, dex_file1, 1000u), LinkerPatch::CodePatch(16u, dex_file1, 1001u), LinkerPatch::CodePatch(16u, dex_file2, 1000u), @@ -98,14 +106,6 @@ TEST(CompiledMethod, LinkerPatchOperators) { LinkerPatch::StringBssEntryPatch(16u, dex_file2, 3001u, 1000u), LinkerPatch::StringBssEntryPatch(16u, dex_file2, 3000u, 1001u), LinkerPatch::StringBssEntryPatch(16u, dex_file2, 3001u, 1001u), - LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3000u, 2000u), - LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3001u, 2000u), - LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3000u, 2001u), - LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3001u, 2001u), - LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3000u, 2000u), - LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3001u, 2000u), - LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3000u, 2001u), - LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3001u, 2001u), LinkerPatch::BakerReadBarrierBranchPatch(16u, 0u, 0u), LinkerPatch::BakerReadBarrierBranchPatch(16u, 0u, 1u), LinkerPatch::BakerReadBarrierBranchPatch(16u, 1u, 0u), @@ -119,6 +119,14 @@ TEST(CompiledMethod, LinkerPatchOperators) { LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3001u, 1000u), LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3000u, 1001u), LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3001u, 1001u), + LinkerPatch::MethodBssEntryPatch(32u, dex_file1, 3000u, 1000u), + LinkerPatch::MethodBssEntryPatch(32u, dex_file1, 3001u, 1000u), + LinkerPatch::MethodBssEntryPatch(32u, dex_file1, 3000u, 1001u), + LinkerPatch::MethodBssEntryPatch(32u, dex_file1, 3001u, 1001u), + LinkerPatch::MethodBssEntryPatch(32u, dex_file2, 3000u, 1000u), + LinkerPatch::MethodBssEntryPatch(32u, dex_file2, 3001u, 1000u), + LinkerPatch::MethodBssEntryPatch(32u, dex_file2, 3000u, 1001u), + LinkerPatch::MethodBssEntryPatch(32u, dex_file2, 3001u, 1001u), LinkerPatch::CodePatch(32u, dex_file1, 1000u), LinkerPatch::CodePatch(32u, dex_file1, 1001u), LinkerPatch::CodePatch(32u, dex_file2, 1000u), @@ -159,14 +167,6 @@ TEST(CompiledMethod, LinkerPatchOperators) { LinkerPatch::StringBssEntryPatch(32u, dex_file2, 3001u, 1000u), LinkerPatch::StringBssEntryPatch(32u, dex_file2, 3000u, 1001u), LinkerPatch::StringBssEntryPatch(32u, dex_file2, 3001u, 1001u), - LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3000u, 2000u), - LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3001u, 2000u), - LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3000u, 2001u), - LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3001u, 2001u), - LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3000u, 2000u), - LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3001u, 2000u), - LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3000u, 2001u), - LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3001u, 2001u), LinkerPatch::BakerReadBarrierBranchPatch(32u, 0u, 0u), LinkerPatch::BakerReadBarrierBranchPatch(32u, 0u, 1u), LinkerPatch::BakerReadBarrierBranchPatch(32u, 1u, 0u), diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc index b87cb61ed6..04ceca0513 100644 --- a/compiler/dex/verification_results.cc +++ b/compiler/dex/verification_results.cc @@ -110,12 +110,12 @@ void VerificationResults::CreateVerifiedMethodFor(MethodReference ref) { // This method should only be called for classes verified at compile time, // which have no verifier error, nor has methods that we know will throw // at runtime. - atomic_verified_methods_.Insert( - ref, - /*expected*/ nullptr, - new VerifiedMethod(/* encountered_error_types */ 0, /* has_runtime_throw */ false)); - // We don't check the result of `Insert` as we could insert twice for the same - // MethodReference in the presence of duplicate methods. + std::unique_ptr<VerifiedMethod> verified_method = std::make_unique<VerifiedMethod>( + /* encountered_error_types */ 0, /* has_runtime_throw */ false); + if (atomic_verified_methods_.Insert(ref, /*expected*/ nullptr, verified_method.get()) == + AtomicMap::InsertResult::kInsertResultSuccess) { + verified_method.release(); + } } void VerificationResults::AddRejectedClass(ClassReference ref) { diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 93f678c64a..0d0769fe98 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -1000,8 +1000,9 @@ bool CompilerDriver::ShouldCompileBasedOnProfile(const MethodReference& method_r if (profile_compilation_info_ == nullptr) { return false; } - // TODO: Revisit compiling all startup methods. b/36457259 - bool result = profile_compilation_info_->IsStartupOrHotMethod(method_ref); + // Compile only hot methods, it is the profile saver's job to decide what startup methods to mark + // as hot. + bool result = profile_compilation_info_->ContainsHotMethod(method_ref); if (kDebugProfileGuidedCompilation) { LOG(INFO) << "[ProfileGuidedCompilation] " @@ -2292,18 +2293,9 @@ class InitializeClassVisitor : public CompilationVisitor { ObjectLock<mirror::Class> lock(soa.Self(), h_klass); // Attempt to initialize allowing initialization of parent classes but still not static // fields. - bool is_superclass_initialized = true; - if (!manager_->GetCompiler()->GetCompilerOptions().IsAppImage()) { - // If not an app image case, the compiler won't initialize too much things and do a fast - // fail, don't check dependencies. + bool is_superclass_initialized = InitializeDependencies(klass, class_loader, soa.Self()); + if (is_superclass_initialized) { manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, true); - } else { - // For app images, do the initialization recursively and resolve types encountered to make - // sure the compiler runs without error. - is_superclass_initialized = InitializeDependencies(klass, class_loader, soa.Self()); - if (is_superclass_initialized) { - manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, true); - } } old_status = klass->GetStatus(); // If superclass cannot be initialized, no need to proceed. @@ -2435,9 +2427,33 @@ class InitializeClassVisitor : public CompilationVisitor { } } + bool NoPotentialInternStrings(Handle<mirror::Class> klass, + Handle<mirror::ClassLoader>* class_loader) + REQUIRES_SHARED(Locks::mutator_lock_) { + StackHandleScope<1> hs(Thread::Current()); + Handle<mirror::DexCache> h_dex_cache = hs.NewHandle(klass->GetDexCache()); + const DexFile* dex_file = h_dex_cache->GetDexFile(); + const DexFile::ClassDef* class_def = klass->GetClassDef(); + annotations::RuntimeEncodedStaticFieldValueIterator value_it(*dex_file, + &h_dex_cache, + class_loader, + manager_->GetClassLinker(), + *class_def); + + const auto jString = annotations::RuntimeEncodedStaticFieldValueIterator::kString; + for ( ; value_it.HasNext(); value_it.Next()) { + if (value_it.GetValueType() == jString) { + // We don't want cache the static encoded strings which is a potential intern. + return false; + } + } + + return true; + } + bool ResolveTypesOfMethods(Thread* self, ArtMethod* m) REQUIRES_SHARED(Locks::mutator_lock_) { - auto rtn_type = m->GetReturnType(true); + auto rtn_type = m->GetReturnType(true); // return value is discarded because resolve will be done internally. if (rtn_type == nullptr) { self->ClearException(); return false; @@ -2548,8 +2564,9 @@ class InitializeClassVisitor : public CompilationVisitor { ObjPtr<mirror::Class> super_class = klass->GetSuperClass(); StackHandleScope<1> hs(self); Handle<mirror::Class> handle_scope_super(hs.NewHandle(super_class)); - if (!NoClinitInDependency(handle_scope_super, self, class_loader)) + if (!NoClinitInDependency(handle_scope_super, self, class_loader)) { return false; + } } uint32_t num_if = klass->NumDirectInterfaces(); @@ -2558,11 +2575,12 @@ class InitializeClassVisitor : public CompilationVisitor { interface = mirror::Class::GetDirectInterface(self, klass.Get(), i); StackHandleScope<1> hs(self); Handle<mirror::Class> handle_interface(hs.NewHandle(interface)); - if (!NoClinitInDependency(handle_interface, self, class_loader)) + if (!NoClinitInDependency(handle_interface, self, class_loader)) { return false; + } } - return true; + return NoPotentialInternStrings(klass, class_loader); } const ParallelCompilationManager* const manager_; diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h index 7c02384ff2..2ef9fa1ccb 100644 --- a/compiler/elf_builder.h +++ b/compiler/elf_builder.h @@ -670,6 +670,7 @@ class ElfBuilder FINAL { Elf_Word rodata_size, Elf_Word text_size, Elf_Word bss_size, + Elf_Word bss_methods_offset, Elf_Word bss_roots_offset) { std::string soname(elf_file_path); size_t directory_separator_pos = soname.rfind('/'); @@ -715,9 +716,18 @@ class ElfBuilder FINAL { Elf_Word bss_index = rodata_index + 1u + (text_size != 0 ? 1u : 0u); Elf_Word oatbss = dynstr_.Add("oatbss"); dynsym_.Add(oatbss, bss_index, bss_address, bss_roots_offset, STB_GLOBAL, STT_OBJECT); + DCHECK_LE(bss_methods_offset, bss_roots_offset); + DCHECK_LE(bss_roots_offset, bss_size); + // Add a symbol marking the start of the methods part of the .bss, if not empty. + if (bss_methods_offset != bss_roots_offset) { + Elf_Word bss_methods_address = bss_address + bss_methods_offset; + Elf_Word bss_methods_size = bss_roots_offset - bss_methods_offset; + Elf_Word oatbssroots = dynstr_.Add("oatbssmethods"); + dynsym_.Add( + oatbssroots, bss_index, bss_methods_address, bss_methods_size, STB_GLOBAL, STT_OBJECT); + } // Add a symbol marking the start of the GC roots part of the .bss, if not empty. if (bss_roots_offset != bss_size) { - DCHECK_LT(bss_roots_offset, bss_size); Elf_Word bss_roots_address = bss_address + bss_roots_offset; Elf_Word bss_roots_size = bss_size - bss_roots_offset; Elf_Word oatbssroots = dynstr_.Add("oatbssroots"); diff --git a/compiler/elf_writer.h b/compiler/elf_writer.h index 7baae527ff..a8a5bc32b7 100644 --- a/compiler/elf_writer.h +++ b/compiler/elf_writer.h @@ -55,6 +55,7 @@ class ElfWriter { virtual void PrepareDynamicSection(size_t rodata_size, size_t text_size, size_t bss_size, + size_t bss_methods_offset, size_t bss_roots_offset) = 0; virtual void PrepareDebugInfo(const ArrayRef<const debug::MethodDebugInfo>& method_infos) = 0; virtual OutputStream* StartRoData() = 0; diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc index 738f5a2b29..5d6dd2e1d7 100644 --- a/compiler/elf_writer_quick.cc +++ b/compiler/elf_writer_quick.cc @@ -22,7 +22,6 @@ #include "base/casts.h" #include "base/logging.h" -#include "base/stl_util.h" #include "compiled_method.h" #include "debug/elf_debug_writer.h" #include "debug/method_debug_info.h" @@ -80,7 +79,7 @@ class DebugInfoTask : public Task { const InstructionSetFeatures* instruction_set_features_; size_t rodata_section_size_; size_t text_section_size_; - const ArrayRef<const debug::MethodDebugInfo>& method_infos_; + const ArrayRef<const debug::MethodDebugInfo> method_infos_; std::vector<uint8_t> result_; }; @@ -97,6 +96,7 @@ class ElfWriterQuick FINAL : public ElfWriter { void PrepareDynamicSection(size_t rodata_size, size_t text_size, size_t bss_size, + size_t bss_methods_offset, size_t bss_roots_offset) OVERRIDE; void PrepareDebugInfo(const ArrayRef<const debug::MethodDebugInfo>& method_infos) OVERRIDE; OutputStream* StartRoData() OVERRIDE; @@ -136,15 +136,15 @@ std::unique_ptr<ElfWriter> CreateElfWriterQuick(InstructionSet instruction_set, const CompilerOptions* compiler_options, File* elf_file) { if (Is64BitInstructionSet(instruction_set)) { - return MakeUnique<ElfWriterQuick<ElfTypes64>>(instruction_set, - features, - compiler_options, - elf_file); + return std::make_unique<ElfWriterQuick<ElfTypes64>>(instruction_set, + features, + compiler_options, + elf_file); } else { - return MakeUnique<ElfWriterQuick<ElfTypes32>>(instruction_set, - features, - compiler_options, - elf_file); + return std::make_unique<ElfWriterQuick<ElfTypes32>>(instruction_set, + features, + compiler_options, + elf_file); } } @@ -160,7 +160,8 @@ ElfWriterQuick<ElfTypes>::ElfWriterQuick(InstructionSet instruction_set, rodata_size_(0u), text_size_(0u), bss_size_(0u), - output_stream_(MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file))), + output_stream_( + std::make_unique<BufferedOutputStream>(std::make_unique<FileOutputStream>(elf_file))), builder_(new ElfBuilder<ElfTypes>(instruction_set, features, output_stream_.get())) {} template <typename ElfTypes> @@ -178,6 +179,7 @@ template <typename ElfTypes> void ElfWriterQuick<ElfTypes>::PrepareDynamicSection(size_t rodata_size, size_t text_size, size_t bss_size, + size_t bss_methods_offset, size_t bss_roots_offset) { DCHECK_EQ(rodata_size_, 0u); rodata_size_ = rodata_size; @@ -189,6 +191,7 @@ void ElfWriterQuick<ElfTypes>::PrepareDynamicSection(size_t rodata_size, rodata_size_, text_size_, bss_size_, + bss_methods_offset, bss_roots_offset); } diff --git a/compiler/exception_test.cc b/compiler/exception_test.cc index dc880b089e..b4777df0df 100644 --- a/compiler/exception_test.cc +++ b/compiler/exception_test.cc @@ -17,6 +17,7 @@ #include <memory> #include "base/arena_allocator.h" +#include "base/callee_save_type.h" #include "base/enums.h" #include "class_linker.h" #include "common_runtime_test.h" @@ -170,7 +171,7 @@ TEST_F(ExceptionTest, StackTraceElement) { Runtime* r = Runtime::Current(); r->SetInstructionSet(kRuntimeISA); ArtMethod* save_method = r->CreateCalleeSaveMethod(); - r->SetCalleeSaveMethod(save_method, Runtime::kSaveAllCalleeSaves); + r->SetCalleeSaveMethod(save_method, CalleeSaveType::kSaveAllCalleeSaves); QuickMethodFrameInfo frame_info = r->GetRuntimeMethodFrameInfo(save_method); ASSERT_EQ(kStackAlignment, 16U); diff --git a/compiler/image_test.h b/compiler/image_test.h index 2f15ff4815..3d89757d51 100644 --- a/compiler/image_test.h +++ b/compiler/image_test.h @@ -290,9 +290,9 @@ inline void CompilationHelper::Compile(CompilerDriver* driver, if (kIsVdexEnabled) { for (size_t i = 0, size = vdex_files.size(); i != size; ++i) { - std::unique_ptr<BufferedOutputStream> vdex_out( - MakeUnique<BufferedOutputStream>( - MakeUnique<FileOutputStream>(vdex_files[i].GetFile()))); + std::unique_ptr<BufferedOutputStream> vdex_out = + std::make_unique<BufferedOutputStream>( + std::make_unique<FileOutputStream>(vdex_files[i].GetFile())); oat_writers[i]->WriteVerifierDeps(vdex_out.get(), nullptr); oat_writers[i]->WriteChecksumsAndVdexHeader(vdex_out.get()); } @@ -311,6 +311,7 @@ inline void CompilationHelper::Compile(CompilerDriver* driver, elf_writer->PrepareDynamicSection(rodata_size, text_size, oat_writer->GetBssSize(), + oat_writer->GetBssMethodsOffset(), oat_writer->GetBssRootsOffset()); writer->UpdateOatFileLayout(i, diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 4d6db4745f..406892e499 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -27,6 +27,8 @@ #include "art_field-inl.h" #include "art_method-inl.h" +#include "base/callee_save_type.h" +#include "base/enums.h" #include "base/logging.h" #include "base/unix_file/fd_file.h" #include "class_linker-inl.h" @@ -47,7 +49,6 @@ #include "globals.h" #include "image.h" #include "imt_conflict_table.h" -#include "intern_table.h" #include "jni_internal.h" #include "linear_alloc.h" #include "lock_word.h" @@ -1572,13 +1573,13 @@ void ImageWriter::CalculateNewObjectOffsets() { image_methods_[ImageHeader::kImtConflictMethod] = runtime->GetImtConflictMethod(); image_methods_[ImageHeader::kImtUnimplementedMethod] = runtime->GetImtUnimplementedMethod(); image_methods_[ImageHeader::kSaveAllCalleeSavesMethod] = - runtime->GetCalleeSaveMethod(Runtime::kSaveAllCalleeSaves); + runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveAllCalleeSaves); image_methods_[ImageHeader::kSaveRefsOnlyMethod] = - runtime->GetCalleeSaveMethod(Runtime::kSaveRefsOnly); + runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveRefsOnly); image_methods_[ImageHeader::kSaveRefsAndArgsMethod] = - runtime->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs); + runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveRefsAndArgs); image_methods_[ImageHeader::kSaveEverythingMethod] = - runtime->GetCalleeSaveMethod(Runtime::kSaveEverything); + runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveEverything); // Visit image methods first to have the main runtime methods in the first image. for (auto* m : image_methods_) { CHECK(m != nullptr); @@ -2482,8 +2483,8 @@ void ImageWriter::CopyAndFixupMethod(ArtMethod* orig, GetOatAddress(kOatAddressQuickResolutionTrampoline), target_ptr_size_); } else { bool found_one = false; - for (size_t i = 0; i < static_cast<size_t>(Runtime::kLastCalleeSaveType); ++i) { - auto idx = static_cast<Runtime::CalleeSaveType>(i); + for (size_t i = 0; i < static_cast<size_t>(CalleeSaveType::kLastCalleeSaveType); ++i) { + auto idx = static_cast<CalleeSaveType>(i); if (runtime->HasCalleeSaveMethod(idx) && runtime->GetCalleeSaveMethod(idx) == orig) { found_one = true; break; diff --git a/compiler/image_writer.h b/compiler/image_writer.h index 2283b39773..5e2db7d8f7 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -36,6 +36,7 @@ #include "class_table.h" #include "driver/compiler_driver.h" #include "image.h" +#include "intern_table.h" #include "lock_word.h" #include "mem_map.h" #include "mirror/dex_cache.h" @@ -106,19 +107,6 @@ class ImageWriter FINAL { ArtMethod* GetImageMethodAddress(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_); - template <typename PtrType> - PtrType GetDexCacheArrayElementImageAddress(const DexFile* dex_file, uint32_t offset) - const REQUIRES_SHARED(Locks::mutator_lock_) { - auto oat_it = dex_file_oat_index_map_.find(dex_file); - DCHECK(oat_it != dex_file_oat_index_map_.end()); - const ImageInfo& image_info = GetImageInfo(oat_it->second); - auto it = image_info.dex_cache_array_starts_.find(dex_file); - DCHECK(it != image_info.dex_cache_array_starts_.end()); - return reinterpret_cast<PtrType>( - image_info.image_begin_ + image_info.bin_slot_offsets_[kBinDexCacheArray] + - it->second + offset); - } - size_t GetOatFileOffset(size_t oat_index) const { return GetImageInfo(oat_index).oat_offset_; } diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index fed1f48d65..66135414f7 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -189,18 +189,12 @@ JitCompiler::~JitCompiler() { bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method, bool osr) { DCHECK(!method->IsProxyMethod()); + DCHECK(method->GetDeclaringClass()->IsResolved()); + TimingLogger logger("JIT compiler timing logger", true, VLOG_IS_ON(jit)); - StackHandleScope<2> hs(self); self->AssertNoPendingException(); Runtime* runtime = Runtime::Current(); - // Ensure the class is initialized. - Handle<mirror::Class> h_class(hs.NewHandle(method->GetDeclaringClass())); - if (!runtime->GetClassLinker()->EnsureInitialized(self, h_class, true, true)) { - VLOG(jit) << "JIT failed to initialize " << method->PrettyMethod(); - return false; - } - // Do the compilation. bool success = false; { diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc index c1ac230d43..18ff1c9bb6 100644 --- a/compiler/linker/arm/relative_patcher_arm_base.cc +++ b/compiler/linker/arm/relative_patcher_arm_base.cc @@ -16,6 +16,7 @@ #include "linker/arm/relative_patcher_arm_base.h" +#include "base/stl_util.h" #include "compiled_method.h" #include "linker/output_stream.h" #include "oat.h" diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc index 117684a66b..bc21607c5b 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -59,11 +59,11 @@ inline bool IsAdrpPatch(const LinkerPatch& patch) { case LinkerPatch::Type::kBakerReadBarrierBranch: return false; case LinkerPatch::Type::kMethodRelative: + case LinkerPatch::Type::kMethodBssEntry: case LinkerPatch::Type::kTypeRelative: case LinkerPatch::Type::kTypeBssEntry: case LinkerPatch::Type::kStringRelative: case LinkerPatch::Type::kStringBssEntry: - case LinkerPatch::Type::kDexCacheArray: return patch.LiteralOffset() == patch.PcInsnOffset(); } } @@ -251,20 +251,20 @@ void Arm64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, // ADD immediate, 64-bit with imm12 == 0 (unset). if (!kEmitCompilerReadBarrier) { DCHECK(patch.GetType() == LinkerPatch::Type::kMethodRelative || - patch.GetType() == LinkerPatch::Type::kStringRelative || - patch.GetType() == LinkerPatch::Type::kTypeRelative) << patch.GetType(); + patch.GetType() == LinkerPatch::Type::kTypeRelative || + patch.GetType() == LinkerPatch::Type::kStringRelative) << patch.GetType(); } else { // With the read barrier (non-Baker) enabled, it could be kStringBssEntry or kTypeBssEntry. DCHECK(patch.GetType() == LinkerPatch::Type::kMethodRelative || - patch.GetType() == LinkerPatch::Type::kStringRelative || patch.GetType() == LinkerPatch::Type::kTypeRelative || - patch.GetType() == LinkerPatch::Type::kStringBssEntry || - patch.GetType() == LinkerPatch::Type::kTypeBssEntry) << patch.GetType(); + patch.GetType() == LinkerPatch::Type::kStringRelative || + patch.GetType() == LinkerPatch::Type::kTypeBssEntry || + patch.GetType() == LinkerPatch::Type::kStringBssEntry) << patch.GetType(); } shift = 0u; // No shift for ADD. } else { // LDR/STR 32-bit or 64-bit with imm12 == 0 (unset). - DCHECK(patch.GetType() == LinkerPatch::Type::kDexCacheArray || + DCHECK(patch.GetType() == LinkerPatch::Type::kMethodBssEntry || patch.GetType() == LinkerPatch::Type::kTypeBssEntry || patch.GetType() == LinkerPatch::Type::kStringBssEntry) << patch.GetType(); DCHECK_EQ(insn & 0xbfbffc00, 0xb9000000) << std::hex << insn; diff --git a/compiler/linker/method_bss_mapping_encoder.h b/compiler/linker/method_bss_mapping_encoder.h new file mode 100644 index 0000000000..b2922ec6d2 --- /dev/null +++ b/compiler/linker/method_bss_mapping_encoder.h @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_LINKER_METHOD_BSS_MAPPING_ENCODER_H_ +#define ART_COMPILER_LINKER_METHOD_BSS_MAPPING_ENCODER_H_ + +#include "base/enums.h" +#include "base/logging.h" +#include "dex_file.h" +#include "method_bss_mapping.h" + +namespace art { +namespace linker { + +// Helper class for encoding compressed MethodBssMapping. +class MethodBssMappingEncoder { + public: + explicit MethodBssMappingEncoder(PointerSize pointer_size) + : pointer_size_(static_cast<size_t>(pointer_size)) { + entry_.method_index = DexFile::kDexNoIndex16; + entry_.index_mask = 0u; + entry_.bss_offset = static_cast<uint32_t>(-1); + } + + // Try to merge the next method_index -> bss_offset mapping into the current entry. + // Return true on success, false on failure. + bool TryMerge(uint32_t method_index, uint32_t bss_offset) { + DCHECK_NE(method_index, entry_.method_index); + if (entry_.bss_offset + pointer_size_ != bss_offset) { + return false; + } + uint32_t diff = method_index - entry_.method_index; + if (diff > 16u) { + return false; + } + if ((entry_.index_mask & ~(static_cast<uint32_t>(-1) << diff)) != 0u) { + return false; + } + entry_.method_index = method_index; + // Insert the bit indicating the method index we've just overwritten + // and shift bits indicating method indexes before that. + entry_.index_mask = dchecked_integral_cast<uint16_t>( + (static_cast<uint32_t>(entry_.index_mask) | 0x10000u) >> diff); + entry_.bss_offset = bss_offset; + return true; + } + + void Reset(uint32_t method_index, uint32_t bss_offset) { + entry_.method_index = method_index; + entry_.index_mask = 0u; + entry_.bss_offset = bss_offset; + } + + MethodBssMappingEntry GetEntry() { + return entry_; + } + + private: + size_t pointer_size_; + MethodBssMappingEntry entry_; +}; + +} // namespace linker +} // namespace art + +#endif // ART_COMPILER_LINKER_METHOD_BSS_MAPPING_ENCODER_H_ diff --git a/compiler/linker/method_bss_mapping_encoder_test.cc b/compiler/linker/method_bss_mapping_encoder_test.cc new file mode 100644 index 0000000000..1240389bef --- /dev/null +++ b/compiler/linker/method_bss_mapping_encoder_test.cc @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "method_bss_mapping_encoder.h" + +#include "gtest/gtest.h" + +namespace art { +namespace linker { + +TEST(MethodBssMappingEncoder, TryMerge) { + for (PointerSize pointer_size : {PointerSize::k32, PointerSize::k64}) { + size_t raw_pointer_size = static_cast<size_t>(pointer_size); + MethodBssMappingEncoder encoder(pointer_size); + encoder.Reset(1u, 0u); + ASSERT_FALSE(encoder.TryMerge(5u, raw_pointer_size + 1)); // Wrong bss_offset difference. + ASSERT_FALSE(encoder.TryMerge(18u, raw_pointer_size)); // Method index out of range. + ASSERT_TRUE(encoder.TryMerge(5u, raw_pointer_size)); + ASSERT_TRUE(encoder.GetEntry().CoversIndex(1u)); + ASSERT_TRUE(encoder.GetEntry().CoversIndex(5u)); + ASSERT_FALSE(encoder.GetEntry().CoversIndex(17u)); + ASSERT_FALSE(encoder.TryMerge(17u, 2 * raw_pointer_size + 1)); // Wrong bss_offset difference. + ASSERT_FALSE(encoder.TryMerge(18u, 2 * raw_pointer_size)); // Method index out of range. + ASSERT_TRUE(encoder.TryMerge(17u, 2 * raw_pointer_size)); + ASSERT_TRUE(encoder.GetEntry().CoversIndex(1u)); + ASSERT_TRUE(encoder.GetEntry().CoversIndex(5u)); + ASSERT_TRUE(encoder.GetEntry().CoversIndex(17u)); + ASSERT_EQ(0u, encoder.GetEntry().GetBssOffset(1u, raw_pointer_size)); + ASSERT_EQ(raw_pointer_size, encoder.GetEntry().GetBssOffset(5u, raw_pointer_size)); + ASSERT_EQ(2 * raw_pointer_size, encoder.GetEntry().GetBssOffset(17u, raw_pointer_size)); + ASSERT_EQ(0x0011u, encoder.GetEntry().index_mask); + ASSERT_FALSE(encoder.TryMerge(18u, 2 * raw_pointer_size)); // Method index out of range. + } +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/mips/relative_patcher_mips.cc b/compiler/linker/mips/relative_patcher_mips.cc index 8da530f7cc..d99d237a23 100644 --- a/compiler/linker/mips/relative_patcher_mips.cc +++ b/compiler/linker/mips/relative_patcher_mips.cc @@ -50,7 +50,6 @@ void MipsRelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, uint32_t anchor_literal_offset = patch.PcInsnOffset(); uint32_t literal_offset = patch.LiteralOffset(); uint32_t literal_low_offset; - bool dex_cache_array = (patch.GetType() == LinkerPatch::Type::kDexCacheArray); // Perform basic sanity checks and initialize `literal_low_offset` to point // to the instruction containing the 16 least significant bits of the @@ -72,16 +71,8 @@ void MipsRelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, DCHECK_GE(code->size(), 16u); DCHECK_LE(literal_offset, code->size() - 12u); DCHECK_GE(literal_offset, 4u); - // The NAL instruction may not precede immediately as the PC+0 value may - // come from HMipsComputeBaseMethodAddress. - if (dex_cache_array) { - DCHECK_EQ(literal_offset + 4u, anchor_literal_offset); - // NAL - DCHECK_EQ((*code)[literal_offset - 4], 0x00); - DCHECK_EQ((*code)[literal_offset - 3], 0x00); - DCHECK_EQ((*code)[literal_offset - 2], 0x10); - DCHECK_EQ((*code)[literal_offset - 1], 0x04); - } + // The NAL instruction does not precede immediately as the PC+0 + // comes from HMipsComputeBaseMethodAddress. // LUI reg, offset_high DCHECK_EQ((*code)[literal_offset + 0], 0x34); DCHECK_EQ((*code)[literal_offset + 1], 0x12); @@ -90,10 +81,6 @@ void MipsRelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, // ADDU reg, reg, reg2 DCHECK_EQ((*code)[literal_offset + 4], 0x21); DCHECK_EQ(((*code)[literal_offset + 5] & 0x07), 0x00); - if (dex_cache_array) { - // reg2 is either RA or from HMipsComputeBaseMethodAddress. - DCHECK_EQ(((*code)[literal_offset + 6] & 0x1F), 0x1F); - } DCHECK_EQ(((*code)[literal_offset + 7] & 0xFC), 0x00); // instr reg(s), offset_low DCHECK_EQ((*code)[literal_offset + 8], 0x78); @@ -104,9 +91,6 @@ void MipsRelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, // Apply patch. uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset; uint32_t diff = target_offset - anchor_offset; - if (dex_cache_array && !is_r6) { - diff += kDexCacheArrayLwOffset; - } diff += (diff & 0x8000) << 1; // Account for sign extension in "instr reg(s), offset_low". // LUI reg, offset_high / AUIPC reg, offset_high diff --git a/compiler/linker/mips/relative_patcher_mips.h b/compiler/linker/mips/relative_patcher_mips.h index 852a345aa6..0b74bd33a4 100644 --- a/compiler/linker/mips/relative_patcher_mips.h +++ b/compiler/linker/mips/relative_patcher_mips.h @@ -46,9 +46,6 @@ class MipsRelativePatcher FINAL : public RelativePatcher { uint32_t patch_offset) OVERRIDE; private: - // We'll maximize the range of a single load instruction for dex cache array accesses - // by aligning offset -32768 with the offset of the first used element. - static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000; bool is_r6; DISALLOW_COPY_AND_ASSIGN(MipsRelativePatcher); diff --git a/compiler/linker/mips/relative_patcher_mips_test.cc b/compiler/linker/mips/relative_patcher_mips_test.cc index 961b31266f..49af7c614b 100644 --- a/compiler/linker/mips/relative_patcher_mips_test.cc +++ b/compiler/linker/mips/relative_patcher_mips_test.cc @@ -61,7 +61,6 @@ void MipsRelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPa ASSERT_TRUE(result.first); uint32_t diff = target_offset - (result.second + kAnchorOffset); - CHECK_NE(patches[0].GetType(), LinkerPatch::Type::kDexCacheArray); diff += (diff & 0x8000) << 1; // Account for sign extension in addiu. const uint8_t expected_code[] = { diff --git a/compiler/linker/output_stream_test.cc b/compiler/linker/output_stream_test.cc index 84c76f2c6c..09fef29d48 100644 --- a/compiler/linker/output_stream_test.cc +++ b/compiler/linker/output_stream_test.cc @@ -19,7 +19,6 @@ #include "base/unix_file/fd_file.h" #include "base/logging.h" -#include "base/stl_util.h" #include "buffered_output_stream.h" #include "common_runtime_test.h" @@ -79,7 +78,7 @@ TEST_F(OutputStreamTest, File) { TEST_F(OutputStreamTest, Buffered) { ScratchFile tmp; { - BufferedOutputStream buffered_output_stream(MakeUnique<FileOutputStream>(tmp.GetFile())); + BufferedOutputStream buffered_output_stream(std::make_unique<FileOutputStream>(tmp.GetFile())); SetOutputStream(buffered_output_stream); GenerateTestOutput(); } @@ -125,7 +124,7 @@ TEST_F(OutputStreamTest, BufferedFlush) { bool flush_called; }; - std::unique_ptr<CheckingOutputStream> cos = MakeUnique<CheckingOutputStream>(); + std::unique_ptr<CheckingOutputStream> cos = std::make_unique<CheckingOutputStream>(); CheckingOutputStream* checking_output_stream = cos.get(); BufferedOutputStream buffered(std::move(cos)); ASSERT_FALSE(checking_output_stream->flush_called); diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index 1578c0cd3e..55d0bd95d7 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -19,6 +19,7 @@ #include "arch/instruction_set_features.h" #include "art_method-inl.h" #include "base/enums.h" +#include "base/stl_util.h" #include "base/unix_file/fd_file.h" #include "class_linker.h" #include "common_compiler_test.h" @@ -220,11 +221,12 @@ class OatTest : public CommonCompilerTest { elf_writer->PrepareDynamicSection(rodata_size, text_size, oat_writer.GetBssSize(), + oat_writer.GetBssMethodsOffset(), oat_writer.GetBssRootsOffset()); if (kIsVdexEnabled) { - std::unique_ptr<BufferedOutputStream> vdex_out( - MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(vdex_file))); + std::unique_ptr<BufferedOutputStream> vdex_out = + std::make_unique<BufferedOutputStream>(std::make_unique<FileOutputStream>(vdex_file)); if (!oat_writer.WriteVerifierDeps(vdex_out.get(), nullptr)) { return false; } @@ -483,7 +485,7 @@ TEST_F(OatTest, WriteRead) { TEST_F(OatTest, OatHeaderSizeCheck) { // If this test is failing and you have to update these constants, // it is time to update OatHeader::kOatVersion - EXPECT_EQ(72U, sizeof(OatHeader)); + EXPECT_EQ(76U, sizeof(OatHeader)); EXPECT_EQ(4U, sizeof(OatMethodOffsets)); EXPECT_EQ(24U, sizeof(OatQuickMethodHeader)); EXPECT_EQ(161 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index fed2d34cdb..59daf5a09e 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -22,7 +22,7 @@ #include "arch/arm64/instruction_set_features_arm64.h" #include "art_method-inl.h" #include "base/allocator.h" -#include "base/bit_vector.h" +#include "base/bit_vector-inl.h" #include "base/enums.h" #include "base/file_magic.h" #include "base/stl_util.h" @@ -41,6 +41,7 @@ #include "image_writer.h" #include "linker/buffered_output_stream.h" #include "linker/file_output_stream.h" +#include "linker/method_bss_mapping_encoder.h" #include "linker/multi_oat_relative_patcher.h" #include "linker/output_stream.h" #include "mirror/array.h" @@ -230,12 +231,14 @@ class OatWriter::OatDexFile { return dex_file_location_data_; } - void ReserveClassOffsets(OatWriter* oat_writer); - size_t SizeOf() const; bool Write(OatWriter* oat_writer, OutputStream* out) const; bool WriteClassOffsets(OatWriter* oat_writer, OutputStream* out); + size_t GetClassOffsetsRawSize() const { + return class_offsets_.size() * sizeof(class_offsets_[0]); + } + // The source of the dex file. DexFileSource source_; @@ -256,15 +259,12 @@ class OatWriter::OatDexFile { uint32_t dex_file_offset_; uint32_t class_offsets_offset_; uint32_t lookup_table_offset_; + uint32_t method_bss_mapping_offset_; // Data to write to a separate section. dchecked_vector<uint32_t> class_offsets_; private: - size_t GetClassOffsetsRawSize() const { - return class_offsets_.size() * sizeof(class_offsets_[0]); - } - DISALLOW_COPY_AND_ASSIGN(OatDexFile); }; @@ -294,7 +294,10 @@ OatWriter::OatWriter(bool compiling_boot_image, TimingLogger* timings, ProfileCo oat_size_(0u), bss_start_(0u), bss_size_(0u), + bss_methods_offset_(0u), bss_roots_offset_(0u), + bss_method_entry_references_(), + bss_method_entries_(), bss_type_entries_(), bss_string_entries_(), oat_data_offset_(0u), @@ -331,6 +334,7 @@ OatWriter::OatWriter(bool compiling_boot_image, TimingLogger* timings, ProfileCo size_oat_dex_file_offset_(0), size_oat_dex_file_class_offsets_offset_(0), size_oat_dex_file_lookup_table_offset_(0), + size_oat_dex_file_method_bss_mapping_offset_(0), size_oat_lookup_table_alignment_(0), size_oat_lookup_table_(0), size_oat_class_offsets_alignment_(0), @@ -339,6 +343,7 @@ OatWriter::OatWriter(bool compiling_boot_image, TimingLogger* timings, ProfileCo size_oat_class_status_(0), size_oat_class_method_bitmaps_(0), size_oat_class_method_offsets_(0), + size_method_bss_mappings_(0u), relative_patcher_(nullptr), absolute_patch_locations_(), profile_compilation_info_(info) { @@ -502,17 +507,16 @@ bool OatWriter::WriteAndOpenDexFiles( // Reserve space for Vdex header and checksums. vdex_size_ = sizeof(VdexFile::Header) + oat_dex_files_.size() * sizeof(VdexFile::VdexChecksum); } - size_t oat_data_offset = InitOatHeader(instruction_set, - instruction_set_features, - dchecked_integral_cast<uint32_t>(oat_dex_files_.size()), - key_value_store); - oat_size_ = InitOatDexFiles(oat_data_offset); + oat_size_ = InitOatHeader(instruction_set, + instruction_set_features, + dchecked_integral_cast<uint32_t>(oat_dex_files_.size()), + key_value_store); ChecksumUpdatingOutputStream checksum_updating_rodata(oat_rodata, oat_header_.get()); if (kIsVdexEnabled) { - std::unique_ptr<BufferedOutputStream> vdex_out( - MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(vdex_file))); + std::unique_ptr<BufferedOutputStream> vdex_out = + std::make_unique<BufferedOutputStream>(std::make_unique<FileOutputStream>(vdex_file)); // Write DEX files into VDEX, mmap and open them. if (!WriteDexFiles(vdex_out.get(), vdex_file, update_input_vdex) || !OpenDexFiles(vdex_file, verify, &dex_files_map, &dex_files)) { @@ -539,16 +543,6 @@ bool OatWriter::WriteAndOpenDexFiles( return false; } - // Reserve space for class offsets in OAT and update class_offsets_offset_. - for (OatDexFile& oat_dex_file : oat_dex_files_) { - oat_dex_file.ReserveClassOffsets(this); - } - - // Write OatDexFiles into OAT. Needs to be done last, once offsets are collected. - if (!WriteOatDexFiles(&checksum_updating_rodata)) { - return false; - } - *opened_dex_files_map = std::move(dex_files_map); *opened_dex_files = std::move(dex_files); write_state_ = WriteState::kPrepareLayout; @@ -567,16 +561,34 @@ void OatWriter::PrepareLayout(linker::MultiOatRelativePatcher* relative_patcher) InstructionSet instruction_set = compiler_driver_->GetInstructionSet(); CHECK_EQ(instruction_set, oat_header_->GetInstructionSet()); + { + TimingLogger::ScopedTiming split("InitBssLayout", timings_); + InitBssLayout(instruction_set); + } + uint32_t offset = oat_size_; { + TimingLogger::ScopedTiming split("InitClassOffsets", timings_); + offset = InitClassOffsets(offset); + } + { TimingLogger::ScopedTiming split("InitOatClasses", timings_); offset = InitOatClasses(offset); } { + TimingLogger::ScopedTiming split("InitMethodBssMappings", timings_); + offset = InitMethodBssMappings(offset); + } + { TimingLogger::ScopedTiming split("InitOatMaps", timings_); offset = InitOatMaps(offset); } { + TimingLogger::ScopedTiming split("InitOatDexFiles", timings_); + oat_header_->SetOatDexFilesOffset(offset); + offset = InitOatDexFiles(offset); + } + { TimingLogger::ScopedTiming split("InitOatCode", timings_); offset = InitOatCode(offset); } @@ -585,11 +597,7 @@ void OatWriter::PrepareLayout(linker::MultiOatRelativePatcher* relative_patcher) offset = InitOatCodeDexFiles(offset); } oat_size_ = offset; - - { - TimingLogger::ScopedTiming split("InitBssLayout", timings_); - InitBssLayout(instruction_set); - } + bss_start_ = (bss_size_ != 0u) ? RoundUp(oat_size_, kPageSize) : 0u; CHECK_EQ(dex_files_->size(), oat_dex_files_.size()); if (compiling_boot_image_) { @@ -606,11 +614,10 @@ OatWriter::~OatWriter() { class OatWriter::DexMethodVisitor { public: DexMethodVisitor(OatWriter* writer, size_t offset) - : writer_(writer), - offset_(offset), - dex_file_(nullptr), - class_def_index_(DexFile::kDexNoIndex) { - } + : writer_(writer), + offset_(offset), + dex_file_(nullptr), + class_def_index_(DexFile::kDexNoIndex) {} virtual bool StartClass(const DexFile* dex_file, size_t class_def_index) { DCHECK(dex_file_ == nullptr); @@ -650,19 +657,18 @@ class OatWriter::DexMethodVisitor { class OatWriter::OatDexMethodVisitor : public DexMethodVisitor { public: OatDexMethodVisitor(OatWriter* writer, size_t offset) - : DexMethodVisitor(writer, offset), - oat_class_index_(0u), - method_offsets_index_(0u) { - } + : DexMethodVisitor(writer, offset), + oat_class_index_(0u), + method_offsets_index_(0u) {} - bool StartClass(const DexFile* dex_file, size_t class_def_index) { + bool StartClass(const DexFile* dex_file, size_t class_def_index) OVERRIDE { DexMethodVisitor::StartClass(dex_file, class_def_index); DCHECK_LT(oat_class_index_, writer_->oat_classes_.size()); method_offsets_index_ = 0u; return true; } - bool EndClass() { + bool EndClass() OVERRIDE { ++oat_class_index_; return DexMethodVisitor::EndClass(); } @@ -672,21 +678,61 @@ class OatWriter::OatDexMethodVisitor : public DexMethodVisitor { size_t method_offsets_index_; }; +class OatWriter::InitBssLayoutMethodVisitor : public DexMethodVisitor { + public: + explicit InitBssLayoutMethodVisitor(OatWriter* writer) + : DexMethodVisitor(writer, /* offset */ 0u) {} + + bool VisitMethod(size_t class_def_method_index ATTRIBUTE_UNUSED, + const ClassDataItemIterator& it) OVERRIDE { + // Look for patches with .bss references and prepare maps with placeholders for their offsets. + CompiledMethod* compiled_method = writer_->compiler_driver_->GetCompiledMethod( + MethodReference(dex_file_, it.GetMemberIndex())); + if (compiled_method != nullptr) { + for (const LinkerPatch& patch : compiled_method->GetPatches()) { + if (patch.GetType() == LinkerPatch::Type::kMethodBssEntry) { + MethodReference target_method = patch.TargetMethod(); + auto refs_it = writer_->bss_method_entry_references_.find(target_method.dex_file); + if (refs_it == writer_->bss_method_entry_references_.end()) { + refs_it = writer_->bss_method_entry_references_.Put( + target_method.dex_file, + BitVector(target_method.dex_file->NumMethodIds(), + /* expandable */ false, + Allocator::GetMallocAllocator())); + refs_it->second.ClearAllBits(); + } + refs_it->second.SetBit(target_method.dex_method_index); + writer_->bss_method_entries_.Overwrite(target_method, /* placeholder */ 0u); + } else if (patch.GetType() == LinkerPatch::Type::kTypeBssEntry) { + TypeReference ref(patch.TargetTypeDexFile(), patch.TargetTypeIndex()); + writer_->bss_type_entries_.Overwrite(ref, /* placeholder */ 0u); + } else if (patch.GetType() == LinkerPatch::Type::kStringBssEntry) { + StringReference ref(patch.TargetStringDexFile(), patch.TargetStringIndex()); + writer_->bss_string_entries_.Overwrite(ref, /* placeholder */ 0u); + } + } + } + return true; + } +}; + class OatWriter::InitOatClassesMethodVisitor : public DexMethodVisitor { public: InitOatClassesMethodVisitor(OatWriter* writer, size_t offset) - : DexMethodVisitor(writer, offset), - compiled_methods_(), - num_non_null_compiled_methods_(0u) { + : DexMethodVisitor(writer, offset), + compiled_methods_(), + num_non_null_compiled_methods_(0u) { size_t num_classes = 0u; for (const OatDexFile& oat_dex_file : writer_->oat_dex_files_) { num_classes += oat_dex_file.class_offsets_.size(); } writer_->oat_classes_.reserve(num_classes); compiled_methods_.reserve(256u); + // If there are any classes, the class offsets allocation aligns the offset. + DCHECK(num_classes == 0u || IsAligned<4u>(offset)); } - bool StartClass(const DexFile* dex_file, size_t class_def_index) { + bool StartClass(const DexFile* dex_file, size_t class_def_index) OVERRIDE { DexMethodVisitor::StartClass(dex_file, class_def_index); compiled_methods_.clear(); num_non_null_compiled_methods_ = 0u; @@ -694,7 +740,7 @@ class OatWriter::InitOatClassesMethodVisitor : public DexMethodVisitor { } bool VisitMethod(size_t class_def_method_index ATTRIBUTE_UNUSED, - const ClassDataItemIterator& it) { + const ClassDataItemIterator& it) OVERRIDE { // Fill in the compiled_methods_ array for methods that have a // CompiledMethod. We track the number of non-null entries in // num_non_null_compiled_methods_ since we only want to allocate @@ -704,12 +750,12 @@ class OatWriter::InitOatClassesMethodVisitor : public DexMethodVisitor { writer_->compiler_driver_->GetCompiledMethod(MethodReference(dex_file_, method_idx)); compiled_methods_.push_back(compiled_method); if (compiled_method != nullptr) { - ++num_non_null_compiled_methods_; + ++num_non_null_compiled_methods_; } return true; } - bool EndClass() { + bool EndClass() OVERRIDE { ClassReference class_ref(dex_file_, class_def_index_); mirror::Class::Status status; bool found = writer_->compiler_driver_->GetCompiledClass(class_ref, &status); @@ -740,14 +786,14 @@ class OatWriter::InitOatClassesMethodVisitor : public DexMethodVisitor { class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { public: InitCodeMethodVisitor(OatWriter* writer, size_t offset, size_t quickening_info_offset) - : OatDexMethodVisitor(writer, offset), - debuggable_(writer->GetCompilerDriver()->GetCompilerOptions().GetDebuggable()), - current_quickening_info_offset_(quickening_info_offset) { + : OatDexMethodVisitor(writer, offset), + debuggable_(writer->GetCompilerDriver()->GetCompilerOptions().GetDebuggable()), + current_quickening_info_offset_(quickening_info_offset) { writer_->absolute_patch_locations_.reserve( writer_->compiler_driver_->GetNonRelativeLinkerPatchCount()); } - bool EndClass() { + bool EndClass() OVERRIDE { OatDexMethodVisitor::EndClass(); if (oat_class_index_ == writer_->oat_classes_.size()) { offset_ = writer_->relative_patcher_->ReserveSpaceEnd(offset_); @@ -755,7 +801,7 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { return true; } - bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) + bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) { OatClass* oat_class = &writer_->oat_classes_[oat_class_index_]; CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index); @@ -858,14 +904,6 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { if (!patch.IsPcRelative()) { writer_->absolute_patch_locations_.push_back(base_loc + patch.LiteralOffset()); } - if (patch.GetType() == LinkerPatch::Type::kTypeBssEntry) { - TypeReference ref(patch.TargetTypeDexFile(), patch.TargetTypeIndex()); - writer_->bss_type_entries_.Overwrite(ref, /* placeholder */ 0u); - } - if (patch.GetType() == LinkerPatch::Type::kStringBssEntry) { - StringReference ref(patch.TargetStringDexFile(), patch.TargetStringIndex()); - writer_->bss_string_entries_.Overwrite(ref, /* placeholder */ 0u); - } } } } @@ -950,11 +988,10 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor { public: InitMapMethodVisitor(OatWriter* writer, size_t offset) - : OatDexMethodVisitor(writer, offset) { - } + : OatDexMethodVisitor(writer, offset) {} bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it ATTRIBUTE_UNUSED) - REQUIRES_SHARED(Locks::mutator_lock_) { + OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) { OatClass* oat_class = &writer_->oat_classes_[oat_class_index_]; CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index); @@ -997,7 +1034,7 @@ class OatWriter::InitMethodInfoVisitor : public OatDexMethodVisitor { InitMethodInfoVisitor(OatWriter* writer, size_t offset) : OatDexMethodVisitor(writer, offset) {} bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it ATTRIBUTE_UNUSED) - REQUIRES_SHARED(Locks::mutator_lock_) { + OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) { OatClass* oat_class = &writer_->oat_classes_[oat_class_index_]; CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index); @@ -1035,18 +1072,17 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { InitImageMethodVisitor(OatWriter* writer, size_t offset, const std::vector<const DexFile*>* dex_files) - : OatDexMethodVisitor(writer, offset), - pointer_size_(GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet())), - dex_files_(dex_files), - class_linker_(Runtime::Current()->GetClassLinker()) { - } + : OatDexMethodVisitor(writer, offset), + pointer_size_(GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet())), + dex_files_(dex_files), + class_linker_(Runtime::Current()->GetClassLinker()) {} // Handle copied methods here. Copy pointer to quick code from // an origin method to a copied method only if they are // in the same oat file. If the origin and the copied methods are // in different oat files don't touch the copied method. // References to other oat files are not supported yet. - bool StartClass(const DexFile* dex_file, size_t class_def_index) + bool StartClass(const DexFile* dex_file, size_t class_def_index) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) { OatDexMethodVisitor::StartClass(dex_file, class_def_index); // Skip classes that are not in the image. @@ -1085,7 +1121,7 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { return true; } - bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) + bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) { // Skip methods that are not in the image. if (!IsImageClass()) { @@ -1131,8 +1167,7 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { // Should already have been resolved by the compiler, just peek into the dex cache. // It may not be resolved if the class failed to verify, in this case, don't set the // entrypoint. This is not fatal since the dex cache will contain a resolution method. - method = dex_cache->GetResolvedMethod(it.GetMemberIndex(), - class_linker_->GetImagePointerSize()); + method = dex_cache->GetResolvedMethod(it.GetMemberIndex(), pointer_size_); } if (method != nullptr && compiled_method != nullptr && @@ -1171,7 +1206,7 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { } } - protected: + private: const PointerSize pointer_size_; const std::vector<const DexFile*>* dex_files_; ClassLinker* const class_linker_; @@ -1182,14 +1217,15 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { public: WriteCodeMethodVisitor(OatWriter* writer, OutputStream* out, const size_t file_offset, size_t relative_offset) SHARED_LOCK_FUNCTION(Locks::mutator_lock_) - : OatDexMethodVisitor(writer, relative_offset), - class_loader_(writer->HasImage() ? writer->image_writer_->GetClassLoader() : nullptr), - out_(out), - file_offset_(file_offset), - soa_(Thread::Current()), - no_thread_suspension_("OatWriter patching"), - class_linker_(Runtime::Current()->GetClassLinker()), - dex_cache_(nullptr) { + : OatDexMethodVisitor(writer, relative_offset), + pointer_size_(GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet())), + class_loader_(writer->HasImage() ? writer->image_writer_->GetClassLoader() : nullptr), + out_(out), + file_offset_(file_offset), + soa_(Thread::Current()), + no_thread_suspension_("OatWriter patching"), + class_linker_(Runtime::Current()->GetClassLinker()), + dex_cache_(nullptr) { patched_code_.reserve(16 * KB); if (writer_->HasBootImage()) { // If we're creating the image, the address space must be ready so that we can apply patches. @@ -1200,7 +1236,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { ~WriteCodeMethodVisitor() UNLOCK_FUNCTION(Locks::mutator_lock_) { } - bool StartClass(const DexFile* dex_file, size_t class_def_index) + bool StartClass(const DexFile* dex_file, size_t class_def_index) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) { OatDexMethodVisitor::StartClass(dex_file, class_def_index); if (dex_cache_ == nullptr || dex_cache_->GetDexFile() != dex_file) { @@ -1210,7 +1246,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { return true; } - bool EndClass() REQUIRES_SHARED(Locks::mutator_lock_) { + bool EndClass() OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) { bool result = OatDexMethodVisitor::EndClass(); if (oat_class_index_ == writer_->oat_classes_.size()) { DCHECK(result); // OatDexMethodVisitor::EndClass() never fails. @@ -1223,7 +1259,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { return result; } - bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) + bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) { OatClass* oat_class = &writer_->oat_classes_[oat_class_index_]; const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index); @@ -1275,6 +1311,15 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { for (const LinkerPatch& patch : compiled_method->GetPatches()) { uint32_t literal_offset = patch.LiteralOffset(); switch (patch.GetType()) { + case LinkerPatch::Type::kMethodBssEntry: { + uint32_t target_offset = + writer_->bss_start_ + writer_->bss_method_entries_.Get(patch.TargetMethod()); + writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_, + patch, + offset_ + literal_offset, + target_offset); + break; + } case LinkerPatch::Type::kCallRelative: { // NOTE: Relative calls across oat files are not supported. uint32_t target_offset = GetTargetOffset(patch); @@ -1284,14 +1329,6 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { target_offset); break; } - case LinkerPatch::Type::kDexCacheArray: { - uint32_t target_offset = GetDexCacheOffset(patch); - writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_, - patch, - offset_ + literal_offset, - target_offset); - break; - } case LinkerPatch::Type::kStringRelative: { uint32_t target_offset = GetTargetObjectOffset(GetTargetString(patch)); writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_, @@ -1302,7 +1339,8 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } case LinkerPatch::Type::kStringBssEntry: { StringReference ref(patch.TargetStringDexFile(), patch.TargetStringIndex()); - uint32_t target_offset = writer_->bss_string_entries_.Get(ref); + uint32_t target_offset = + writer_->bss_start_ + writer_->bss_string_entries_.Get(ref); writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_, patch, offset_ + literal_offset, @@ -1319,7 +1357,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } case LinkerPatch::Type::kTypeBssEntry: { TypeReference ref(patch.TargetTypeDexFile(), patch.TargetTypeIndex()); - uint32_t target_offset = writer_->bss_type_entries_.Get(ref); + uint32_t target_offset = writer_->bss_start_ + writer_->bss_type_entries_.Get(ref); writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_, patch, offset_ + literal_offset, @@ -1368,6 +1406,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } private: + const PointerSize pointer_size_; ObjPtr<mirror::ClassLoader> class_loader_; OutputStream* const out_; const size_t file_offset_; @@ -1388,8 +1427,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { ObjPtr<mirror::DexCache> dex_cache = (dex_file_ == ref.dex_file) ? dex_cache_ : class_linker_->FindDexCache( Thread::Current(), *ref.dex_file); - ArtMethod* method = dex_cache->GetResolvedMethod( - ref.dex_method_index, class_linker_->GetImagePointerSize()); + ArtMethod* method = dex_cache->GetResolvedMethod(ref.dex_method_index, pointer_size_); CHECK(method != nullptr); return method; } @@ -1401,9 +1439,8 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { if (UNLIKELY(target_offset == 0)) { ArtMethod* target = GetTargetMethod(patch); DCHECK(target != nullptr); - PointerSize size = - GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet()); - const void* oat_code_offset = target->GetEntryPointFromQuickCompiledCodePtrSize(size); + const void* oat_code_offset = + target->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size_); if (oat_code_offset != 0) { DCHECK(!writer_->HasBootImage()); DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickResolutionStub(oat_code_offset)); @@ -1447,19 +1484,6 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { return string; } - uint32_t GetDexCacheOffset(const LinkerPatch& patch) REQUIRES_SHARED(Locks::mutator_lock_) { - if (writer_->HasBootImage()) { - uintptr_t element = writer_->image_writer_->GetDexCacheArrayElementImageAddress<uintptr_t>( - patch.TargetDexCacheDexFile(), patch.TargetDexCacheElementOffset()); - size_t oat_index = writer_->image_writer_->GetOatIndexForDexCache(dex_cache_); - uintptr_t oat_data = writer_->image_writer_->GetOatDataBegin(oat_index); - return element - oat_data; - } else { - size_t start = writer_->dex_cache_arrays_offsets_.Get(patch.TargetDexCacheDexFile()); - return start + patch.TargetDexCacheElementOffset(); - } - } - uint32_t GetTargetMethodOffset(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(writer_->HasBootImage()); method = writer_->image_writer_->GetImageMethodAddress(method); @@ -1525,12 +1549,11 @@ class OatWriter::WriteMapMethodVisitor : public OatDexMethodVisitor { OutputStream* out, const size_t file_offset, size_t relative_offset) - : OatDexMethodVisitor(writer, relative_offset), - out_(out), - file_offset_(file_offset) { - } + : OatDexMethodVisitor(writer, relative_offset), + out_(out), + file_offset_(file_offset) {} - bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) { + bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) OVERRIDE { OatClass* oat_class = &writer_->oat_classes_[oat_class_index_]; const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index); @@ -1589,11 +1612,11 @@ class OatWriter::WriteMethodInfoVisitor : public OatDexMethodVisitor { OutputStream* out, const size_t file_offset, size_t relative_offset) - : OatDexMethodVisitor(writer, relative_offset), - out_(out), - file_offset_(file_offset) {} + : OatDexMethodVisitor(writer, relative_offset), + out_(out), + file_offset_(file_offset) {} - bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) { + bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) OVERRIDE { OatClass* oat_class = &writer_->oat_classes_[oat_class_index_]; const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index); @@ -1698,12 +1721,17 @@ size_t OatWriter::InitOatHeader(InstructionSet instruction_set, return oat_header_->GetHeaderSize(); } -size_t OatWriter::InitOatDexFiles(size_t offset) { - TimingLogger::ScopedTiming split("InitOatDexFiles", timings_); - // Initialize offsets of dex files. +size_t OatWriter::InitClassOffsets(size_t offset) { + // Reserve space for class offsets in OAT and update class_offsets_offset_. for (OatDexFile& oat_dex_file : oat_dex_files_) { - oat_dex_file.offset_ = offset; - offset += oat_dex_file.SizeOf(); + DCHECK_EQ(oat_dex_file.class_offsets_offset_, 0u); + if (!oat_dex_file.class_offsets_.empty()) { + // Class offsets are required to be 4 byte aligned. + offset = RoundUp(offset, 4u); + oat_dex_file.class_offsets_offset_ = offset; + offset += oat_dex_file.GetClassOffsetsRawSize(); + DCHECK_ALIGNED(offset, 4u); + } } return offset; } @@ -1748,6 +1776,50 @@ size_t OatWriter::InitOatMaps(size_t offset) { return offset; } +size_t OatWriter::InitMethodBssMappings(size_t offset) { + size_t number_of_dex_files = 0u; + for (size_t i = 0, size = dex_files_->size(); i != size; ++i) { + const DexFile* dex_file = (*dex_files_)[i]; + auto it = bss_method_entry_references_.find(dex_file); + if (it != bss_method_entry_references_.end()) { + const BitVector& method_indexes = it->second; + ++number_of_dex_files; + // If there are any classes, the class offsets allocation aligns the offset + // and we cannot have method bss mappings without class offsets. + static_assert(alignof(MethodBssMapping) == 4u, "MethodBssMapping alignment check."); + DCHECK_ALIGNED(offset, 4u); + oat_dex_files_[i].method_bss_mapping_offset_ = offset; + + linker::MethodBssMappingEncoder encoder( + GetInstructionSetPointerSize(oat_header_->GetInstructionSet())); + size_t number_of_entries = 0u; + bool first_index = true; + for (uint32_t method_index : method_indexes.Indexes()) { + uint32_t bss_offset = bss_method_entries_.Get(MethodReference(dex_file, method_index)); + if (first_index || !encoder.TryMerge(method_index, bss_offset)) { + encoder.Reset(method_index, bss_offset); + ++number_of_entries; + first_index = false; + } + } + DCHECK_NE(number_of_entries, 0u); + offset += MethodBssMapping::ComputeSize(number_of_entries); + } + } + // Check that all dex files targeted by method bss entries are in `*dex_files_`. + CHECK_EQ(number_of_dex_files, bss_method_entry_references_.size()); + return offset; +} + +size_t OatWriter::InitOatDexFiles(size_t offset) { + // Initialize offsets of oat dex files. + for (OatDexFile& oat_dex_file : oat_dex_files_) { + oat_dex_file.offset_ = offset; + offset += oat_dex_file.SizeOf(); + } + return offset; +} + size_t OatWriter::InitOatCode(size_t offset) { // calculate the offsets within OatHeader to executable code size_t old_offset = offset; @@ -1806,38 +1878,51 @@ size_t OatWriter::InitOatCodeDexFiles(size_t offset) { } void OatWriter::InitBssLayout(InstructionSet instruction_set) { + { + InitBssLayoutMethodVisitor visitor(this); + bool success = VisitDexMethods(&visitor); + DCHECK(success); + } + + DCHECK_EQ(bss_size_, 0u); if (HasBootImage()) { DCHECK(bss_string_entries_.empty()); - if (bss_type_entries_.empty()) { + if (bss_method_entries_.empty() && bss_type_entries_.empty()) { // Nothing to put to the .bss section. return; } } // Allocate space for app dex cache arrays in the .bss section. - bss_start_ = RoundUp(oat_size_, kPageSize); - bss_size_ = 0u; + PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set); if (!HasBootImage()) { - PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set); for (const DexFile* dex_file : *dex_files_) { - dex_cache_arrays_offsets_.Put(dex_file, bss_start_ + bss_size_); DexCacheArraysLayout layout(pointer_size, dex_file); bss_size_ += layout.Size(); } } + bss_methods_offset_ = bss_size_; + + // Prepare offsets for .bss ArtMethod entries. + for (auto& entry : bss_method_entries_) { + DCHECK_EQ(entry.second, 0u); + entry.second = bss_size_; + bss_size_ += static_cast<size_t>(pointer_size); + } + bss_roots_offset_ = bss_size_; // Prepare offsets for .bss Class entries. for (auto& entry : bss_type_entries_) { DCHECK_EQ(entry.second, 0u); - entry.second = bss_start_ + bss_size_; + entry.second = bss_size_; bss_size_ += sizeof(GcRoot<mirror::Class>); } // Prepare offsets for .bss String entries. for (auto& entry : bss_string_entries_) { DCHECK_EQ(entry.second, 0u); - entry.second = bss_start_ + bss_size_; + entry.second = bss_size_; bss_size_ += sizeof(GcRoot<mirror::String>); } } @@ -1845,30 +1930,45 @@ void OatWriter::InitBssLayout(InstructionSet instruction_set) { bool OatWriter::WriteRodata(OutputStream* out) { CHECK(write_state_ == WriteState::kWriteRoData); + size_t file_offset = oat_data_offset_; + off_t current_offset = out->Seek(0, kSeekCurrent); + if (current_offset == static_cast<off_t>(-1)) { + PLOG(ERROR) << "Failed to retrieve current position in " << out->GetLocation(); + } + DCHECK_GE(static_cast<size_t>(current_offset), file_offset + oat_header_->GetHeaderSize()); + size_t relative_offset = current_offset - file_offset; + // Wrap out to update checksum with each write. ChecksumUpdatingOutputStream checksum_updating_out(out, oat_header_.get()); out = &checksum_updating_out; - if (!WriteClassOffsets(out)) { - LOG(ERROR) << "Failed to write class offsets to " << out->GetLocation(); + relative_offset = WriteClassOffsets(out, file_offset, relative_offset); + if (relative_offset == 0) { + PLOG(ERROR) << "Failed to write class offsets to " << out->GetLocation(); return false; } - if (!WriteClasses(out)) { - LOG(ERROR) << "Failed to write classes to " << out->GetLocation(); + relative_offset = WriteClasses(out, file_offset, relative_offset); + if (relative_offset == 0) { + PLOG(ERROR) << "Failed to write classes to " << out->GetLocation(); return false; } - off_t tables_end_offset = out->Seek(0, kSeekCurrent); - if (tables_end_offset == static_cast<off_t>(-1)) { - LOG(ERROR) << "Failed to get oat code position in " << out->GetLocation(); + relative_offset = WriteMethodBssMappings(out, file_offset, relative_offset); + if (relative_offset == 0) { + PLOG(ERROR) << "Failed to write method bss mappings to " << out->GetLocation(); return false; } - size_t file_offset = oat_data_offset_; - size_t relative_offset = static_cast<size_t>(tables_end_offset) - file_offset; + relative_offset = WriteMaps(out, file_offset, relative_offset); if (relative_offset == 0) { - LOG(ERROR) << "Failed to write oat code to " << out->GetLocation(); + PLOG(ERROR) << "Failed to write oat code to " << out->GetLocation(); + return false; + } + + relative_offset = WriteOatDexFiles(out, file_offset, relative_offset); + if (relative_offset == 0) { + PLOG(ERROR) << "Failed to write oat dex information to " << out->GetLocation(); return false; } @@ -1891,12 +1991,12 @@ bool OatWriter::WriteRodata(OutputStream* out) { class OatWriter::WriteQuickeningInfoMethodVisitor : public DexMethodVisitor { public: WriteQuickeningInfoMethodVisitor(OatWriter* writer, OutputStream* out, uint32_t offset) - : DexMethodVisitor(writer, offset), - out_(out), - written_bytes_(0u) {} + : DexMethodVisitor(writer, offset), + out_(out), + written_bytes_(0u) {} bool VisitMethod(size_t class_def_method_index ATTRIBUTE_UNUSED, - const ClassDataItemIterator& it) { + const ClassDataItemIterator& it) OVERRIDE { if (it.GetMethodCodeItem() == nullptr) { // No CodeItem. Native or abstract method. return true; @@ -2092,6 +2192,7 @@ bool OatWriter::WriteCode(OutputStream* out) { DO_STAT(size_oat_dex_file_offset_); DO_STAT(size_oat_dex_file_class_offsets_offset_); DO_STAT(size_oat_dex_file_lookup_table_offset_); + DO_STAT(size_oat_dex_file_method_bss_mapping_offset_); DO_STAT(size_oat_lookup_table_alignment_); DO_STAT(size_oat_lookup_table_); DO_STAT(size_oat_class_offsets_alignment_); @@ -2100,6 +2201,7 @@ bool OatWriter::WriteCode(OutputStream* out) { DO_STAT(size_oat_class_status_); DO_STAT(size_oat_class_method_bitmaps_); DO_STAT(size_oat_class_method_offsets_); + DO_STAT(size_method_bss_mappings_); #undef DO_STAT VLOG(compiler) << "size_total=" << PrettySize(size_total) << " (" << size_total << "B)"; @@ -2172,35 +2274,41 @@ bool OatWriter::WriteHeader(OutputStream* out, return true; } -bool OatWriter::WriteClassOffsets(OutputStream* out) { +size_t OatWriter::WriteClassOffsets(OutputStream* out, size_t file_offset, size_t relative_offset) { for (OatDexFile& oat_dex_file : oat_dex_files_) { if (oat_dex_file.class_offsets_offset_ != 0u) { - uint32_t expected_offset = oat_data_offset_ + oat_dex_file.class_offsets_offset_; - off_t actual_offset = out->Seek(expected_offset, kSeekSet); - if (static_cast<uint32_t>(actual_offset) != expected_offset) { - PLOG(ERROR) << "Failed to seek to oat class offsets section. Actual: " << actual_offset - << " Expected: " << expected_offset << " File: " << oat_dex_file.GetLocation(); - return false; + // Class offsets are required to be 4 byte aligned. + if (UNLIKELY(!IsAligned<4u>(relative_offset))) { + size_t padding_size = RoundUp(relative_offset, 4u) - relative_offset; + if (!WriteUpTo16BytesAlignment(out, padding_size, &size_oat_class_offsets_alignment_)) { + return 0u; + } + relative_offset += padding_size; } + DCHECK_OFFSET(); if (!oat_dex_file.WriteClassOffsets(this, out)) { - return false; + return 0u; } + relative_offset += oat_dex_file.GetClassOffsetsRawSize(); } } - return true; + return relative_offset; } -bool OatWriter::WriteClasses(OutputStream* out) { +size_t OatWriter::WriteClasses(OutputStream* out, size_t file_offset, size_t relative_offset) { for (OatClass& oat_class : oat_classes_) { + // If there are any classes, the class offsets allocation aligns the offset. + DCHECK_ALIGNED(relative_offset, 4u); + DCHECK_OFFSET(); if (!oat_class.Write(this, out, oat_data_offset_)) { - PLOG(ERROR) << "Failed to write oat methods information to " << out->GetLocation(); - return false; + return 0u; } + relative_offset += oat_class.SizeOf(); } - return true; + return relative_offset; } -size_t OatWriter::WriteMaps(OutputStream* out, const size_t file_offset, size_t relative_offset) { +size_t OatWriter::WriteMaps(OutputStream* out, size_t file_offset, size_t relative_offset) { { size_t vmap_tables_offset = relative_offset; WriteMapMethodVisitor visitor(this, out, file_offset, relative_offset); @@ -2223,7 +2331,87 @@ size_t OatWriter::WriteMaps(OutputStream* out, const size_t file_offset, size_t return relative_offset; } -size_t OatWriter::WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset) { +size_t OatWriter::WriteMethodBssMappings(OutputStream* out, + size_t file_offset, + size_t relative_offset) { + TimingLogger::ScopedTiming split("WriteMethodBssMappings", timings_); + + for (size_t i = 0, size = dex_files_->size(); i != size; ++i) { + const DexFile* dex_file = (*dex_files_)[i]; + OatDexFile* oat_dex_file = &oat_dex_files_[i]; + auto it = bss_method_entry_references_.find(dex_file); + if (it != bss_method_entry_references_.end()) { + const BitVector& method_indexes = it->second; + // If there are any classes, the class offsets allocation aligns the offset + // and we cannot have method bss mappings without class offsets. + static_assert(alignof(MethodBssMapping) == sizeof(uint32_t), + "MethodBssMapping alignment check."); + DCHECK_ALIGNED(relative_offset, sizeof(uint32_t)); + + linker::MethodBssMappingEncoder encoder( + GetInstructionSetPointerSize(oat_header_->GetInstructionSet())); + // Allocate a sufficiently large MethodBssMapping. + size_t number_of_method_indexes = method_indexes.NumSetBits(); + DCHECK_NE(number_of_method_indexes, 0u); + size_t max_mappings_size = MethodBssMapping::ComputeSize(number_of_method_indexes); + DCHECK_ALIGNED(max_mappings_size, sizeof(uint32_t)); + std::unique_ptr<uint32_t[]> storage(new uint32_t[max_mappings_size / sizeof(uint32_t)]); + MethodBssMapping* mappings = new(storage.get()) MethodBssMapping(number_of_method_indexes); + mappings->ClearPadding(); + // Encode the MethodBssMapping. + auto init_it = mappings->begin(); + bool first_index = true; + for (uint32_t method_index : method_indexes.Indexes()) { + size_t bss_offset = bss_method_entries_.Get(MethodReference(dex_file, method_index)); + if (first_index) { + first_index = false; + encoder.Reset(method_index, bss_offset); + } else if (!encoder.TryMerge(method_index, bss_offset)) { + *init_it = encoder.GetEntry(); + ++init_it; + encoder.Reset(method_index, bss_offset); + } + } + // Store the last entry and shrink the mapping to the actual size. + *init_it = encoder.GetEntry(); + ++init_it; + DCHECK(init_it <= mappings->end()); + mappings->SetSize(std::distance(mappings->begin(), init_it)); + size_t mappings_size = MethodBssMapping::ComputeSize(mappings->size()); + + DCHECK_EQ(relative_offset, oat_dex_file->method_bss_mapping_offset_); + DCHECK_OFFSET(); + if (!out->WriteFully(storage.get(), mappings_size)) { + return 0u; + } + size_method_bss_mappings_ += mappings_size; + relative_offset += mappings_size; + } else { + DCHECK_EQ(0u, oat_dex_file->method_bss_mapping_offset_); + } + } + return relative_offset; +} + +size_t OatWriter::WriteOatDexFiles(OutputStream* out, size_t file_offset, size_t relative_offset) { + TimingLogger::ScopedTiming split("WriteOatDexFiles", timings_); + + for (size_t i = 0, size = oat_dex_files_.size(); i != size; ++i) { + OatDexFile* oat_dex_file = &oat_dex_files_[i]; + DCHECK_EQ(relative_offset, oat_dex_file->offset_); + DCHECK_OFFSET(); + + // Write OatDexFile. + if (!oat_dex_file->Write(this, out)) { + return 0u; + } + relative_offset += oat_dex_file->SizeOf(); + } + + return relative_offset; +} + +size_t OatWriter::WriteCode(OutputStream* out, size_t file_offset, size_t relative_offset) { if (compiler_driver_->GetCompilerOptions().IsBootImage()) { InstructionSet instruction_set = compiler_driver_->GetInstructionSet(); @@ -2253,7 +2441,7 @@ size_t OatWriter::WriteCode(OutputStream* out, const size_t file_offset, size_t } size_t OatWriter::WriteCodeDexFiles(OutputStream* out, - const size_t file_offset, + size_t file_offset, size_t relative_offset) { #define VISIT(VisitorType) \ do { \ @@ -2667,50 +2855,6 @@ bool OatWriter::WriteDexFile(OutputStream* out, return true; } -bool OatWriter::WriteOatDexFiles(OutputStream* rodata) { - TimingLogger::ScopedTiming split("WriteOatDexFiles", timings_); - - off_t initial_offset = rodata->Seek(0, kSeekCurrent); - if (initial_offset == static_cast<off_t>(-1)) { - LOG(ERROR) << "Failed to get current position in " << rodata->GetLocation(); - return false; - } - - // Seek to the start of OatDexFiles, i.e. to the end of the OatHeader. If there are - // no OatDexFiles, no data is actually written to .rodata before WriteHeader() and - // this Seek() ensures that we reserve the space for OatHeader in .rodata. - DCHECK(oat_dex_files_.empty() || oat_dex_files_[0u].offset_ == oat_header_->GetHeaderSize()); - uint32_t expected_offset = oat_data_offset_ + oat_header_->GetHeaderSize(); - off_t actual_offset = rodata->Seek(expected_offset, kSeekSet); - if (static_cast<uint32_t>(actual_offset) != expected_offset) { - PLOG(ERROR) << "Failed to seek to OatDexFile table section. Actual: " << actual_offset - << " Expected: " << expected_offset << " File: " << rodata->GetLocation(); - return false; - } - - for (size_t i = 0, size = oat_dex_files_.size(); i != size; ++i) { - OatDexFile* oat_dex_file = &oat_dex_files_[i]; - - DCHECK_EQ(oat_data_offset_ + oat_dex_file->offset_, - static_cast<size_t>(rodata->Seek(0, kSeekCurrent))); - - // Write OatDexFile. - if (!oat_dex_file->Write(this, rodata)) { - PLOG(ERROR) << "Failed to write oat dex information to " << rodata->GetLocation(); - return false; - } - } - - // Seek back to the initial position. - if (rodata->Seek(initial_offset, kSeekSet) != initial_offset) { - PLOG(ERROR) << "Failed to seek to initial position. Actual: " << actual_offset - << " Expected: " << initial_offset << " File: " << rodata->GetLocation(); - return false; - } - - return true; -} - bool OatWriter::OpenDexFiles( File* file, bool verify, @@ -2929,14 +3073,18 @@ bool OatWriter::WriteChecksumsAndVdexHeader(OutputStream* vdex_out) { } bool OatWriter::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta) { + return WriteUpTo16BytesAlignment(out, aligned_code_delta, &size_code_alignment_); +} + +bool OatWriter::WriteUpTo16BytesAlignment(OutputStream* out, uint32_t size, uint32_t* stat) { static const uint8_t kPadding[] = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u }; - DCHECK_LE(aligned_code_delta, sizeof(kPadding)); - if (UNLIKELY(!out->WriteFully(kPadding, aligned_code_delta))) { + DCHECK_LE(size, sizeof(kPadding)); + if (UNLIKELY(!out->WriteFully(kPadding, size))) { return false; } - size_code_alignment_ += aligned_code_delta; + *stat += size; return true; } @@ -2965,6 +3113,7 @@ OatWriter::OatDexFile::OatDexFile(const char* dex_file_location, dex_file_offset_(0u), class_offsets_offset_(0u), lookup_table_offset_(0u), + method_bss_mapping_offset_(0u), class_offsets_() { } @@ -2974,19 +3123,8 @@ size_t OatWriter::OatDexFile::SizeOf() const { + sizeof(dex_file_location_checksum_) + sizeof(dex_file_offset_) + sizeof(class_offsets_offset_) - + sizeof(lookup_table_offset_); -} - -void OatWriter::OatDexFile::ReserveClassOffsets(OatWriter* oat_writer) { - DCHECK_EQ(class_offsets_offset_, 0u); - if (!class_offsets_.empty()) { - // Class offsets are required to be 4 byte aligned. - size_t initial_offset = oat_writer->oat_size_; - size_t offset = RoundUp(initial_offset, 4); - oat_writer->size_oat_class_offsets_alignment_ += offset - initial_offset; - class_offsets_offset_ = offset; - oat_writer->oat_size_ = offset + GetClassOffsetsRawSize(); - } + + sizeof(lookup_table_offset_) + + sizeof(method_bss_mapping_offset_); } bool OatWriter::OatDexFile::Write(OatWriter* oat_writer, OutputStream* out) const { @@ -3029,6 +3167,12 @@ bool OatWriter::OatDexFile::Write(OatWriter* oat_writer, OutputStream* out) cons } oat_writer->size_oat_dex_file_lookup_table_offset_ += sizeof(lookup_table_offset_); + if (!out->WriteFully(&method_bss_mapping_offset_, sizeof(method_bss_mapping_offset_))) { + PLOG(ERROR) << "Failed to write method bss mapping offset to " << out->GetLocation(); + return false; + } + oat_writer->size_oat_dex_file_method_bss_mapping_offset_ += sizeof(method_bss_mapping_offset_); + return true; } diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h index 66b70ade2e..9217701bc5 100644 --- a/compiler/oat_writer.h +++ b/compiler/oat_writer.h @@ -60,11 +60,6 @@ namespace verifier { // OatHeader variable length with count of D OatDexFiles // -// OatDexFile[0] one variable sized OatDexFile with offsets to Dex and OatClasses -// OatDexFile[1] -// ... -// OatDexFile[D] -// // TypeLookupTable[0] one descriptor to class def index hash table for each OatDexFile. // TypeLookupTable[1] // ... @@ -80,20 +75,25 @@ namespace verifier { // ... // OatClass[C] // -// GcMap one variable sized blob with GC map. -// GcMap GC maps are deduplicated. +// MethodBssMapping one variable sized MethodBssMapping for each dex file, optional. +// MethodBssMapping // ... -// GcMap +// MethodBssMapping // -// VmapTable one variable sized VmapTable blob (quick compiler only). +// VmapTable one variable sized VmapTable blob (CodeInfo or QuickeningInfo). // VmapTable VmapTables are deduplicated. // ... // VmapTable // -// MappingTable one variable sized blob with MappingTable (quick compiler only). -// MappingTable MappingTables are deduplicated. +// MethodInfo one variable sized blob with MethodInfo. +// MethodInfo MethodInfos are deduplicated. +// ... +// MethodInfo +// +// OatDexFile[0] one variable sized OatDexFile with offsets to Dex and OatClasses +// OatDexFile[1] // ... -// MappingTable +// OatDexFile[D] // // padding if necessary so that the following code will be page aligned // @@ -217,6 +217,10 @@ class OatWriter { return bss_size_; } + size_t GetBssMethodsOffset() const { + return bss_methods_offset_; + } + size_t GetBssRootsOffset() const { return bss_roots_offset_; } @@ -251,6 +255,7 @@ class OatWriter { // to actually write it. class DexMethodVisitor; class OatDexMethodVisitor; + class InitBssLayoutMethodVisitor; class InitOatClassesMethodVisitor; class InitCodeMethodVisitor; class InitMapMethodVisitor; @@ -295,26 +300,30 @@ class OatWriter { const InstructionSetFeatures* instruction_set_features, uint32_t num_dex_files, SafeMap<std::string, std::string>* key_value_store); - size_t InitOatDexFiles(size_t offset); + size_t InitClassOffsets(size_t offset); size_t InitOatClasses(size_t offset); size_t InitOatMaps(size_t offset); + size_t InitMethodBssMappings(size_t offset); + size_t InitOatDexFiles(size_t offset); size_t InitOatCode(size_t offset); size_t InitOatCodeDexFiles(size_t offset); void InitBssLayout(InstructionSet instruction_set); - bool WriteClassOffsets(OutputStream* out); - bool WriteClasses(OutputStream* out); - size_t WriteMaps(OutputStream* out, const size_t file_offset, size_t relative_offset); - size_t WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset); - size_t WriteCodeDexFiles(OutputStream* out, const size_t file_offset, size_t relative_offset); + size_t WriteClassOffsets(OutputStream* out, size_t file_offset, size_t relative_offset); + size_t WriteClasses(OutputStream* out, size_t file_offset, size_t relative_offset); + size_t WriteMaps(OutputStream* out, size_t file_offset, size_t relative_offset); + size_t WriteMethodBssMappings(OutputStream* out, size_t file_offset, size_t relative_offset); + size_t WriteOatDexFiles(OutputStream* out, size_t file_offset, size_t relative_offset); + size_t WriteCode(OutputStream* out, size_t file_offset, size_t relative_offset); + size_t WriteCodeDexFiles(OutputStream* out, size_t file_offset, size_t relative_offset); bool RecordOatDataOffset(OutputStream* out); bool ReadDexFileHeader(File* oat_file, OatDexFile* oat_dex_file); bool ValidateDexFileHeader(const uint8_t* raw_header, const char* location); - bool WriteOatDexFiles(OutputStream* oat_rodata); bool WriteTypeLookupTables(OutputStream* oat_rodata, const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files); bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta); + bool WriteUpTo16BytesAlignment(OutputStream* out, uint32_t size, uint32_t* stat); void SetMultiOatRelativePatcherAdjustment(); void CloseSources(); @@ -368,9 +377,20 @@ class OatWriter { // The size of the required .bss section holding the DexCache data and GC roots. size_t bss_size_; + // The offset of the methods in .bss section. + size_t bss_methods_offset_; + // The offset of the GC roots in .bss section. size_t bss_roots_offset_; + // Map for recording references to ArtMethod entries in .bss. + SafeMap<const DexFile*, BitVector> bss_method_entry_references_; + + // Map for allocating ArtMethod entries in .bss. Indexed by MethodReference for the target + // method in the dex file with the "method reference value comparator" for deduplication. + // The value is the target offset for patching, starting at `bss_start_ + bss_methods_offset_`. + SafeMap<MethodReference, size_t, MethodReferenceValueComparator> bss_method_entries_; + // Map for allocating Class entries in .bss. Indexed by TypeReference for the source // type in the dex file with the "type value comparator" for deduplication. The value // is the target offset for patching, starting at `bss_start_ + bss_roots_offset_`. @@ -381,10 +401,6 @@ class OatWriter { // is the target offset for patching, starting at `bss_start_ + bss_roots_offset_`. SafeMap<StringReference, size_t, StringReferenceValueComparator> bss_string_entries_; - // Offsets of the dex cache arrays for each app dex file. For the - // boot image, this information is provided by the ImageWriter. - SafeMap<const DexFile*, size_t> dex_cache_arrays_offsets_; // DexFiles not owned. - // Offset of the oat data from the start of the mmapped region of the elf file. size_t oat_data_offset_; @@ -434,6 +450,7 @@ class OatWriter { uint32_t size_oat_dex_file_offset_; uint32_t size_oat_dex_file_class_offsets_offset_; uint32_t size_oat_dex_file_lookup_table_offset_; + uint32_t size_oat_dex_file_method_bss_mapping_offset_; uint32_t size_oat_lookup_table_alignment_; uint32_t size_oat_lookup_table_; uint32_t size_oat_class_offsets_alignment_; @@ -442,6 +459,7 @@ class OatWriter { uint32_t size_oat_class_status_; uint32_t size_oat_class_method_bitmaps_; uint32_t size_oat_class_method_offsets_; + uint32_t size_method_bss_mappings_; // The helper for processing relative patches is external so that we can patch across oat files. linker::MultiOatRelativePatcher* relative_patcher_; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index c918ee6687..93234f9630 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -41,6 +41,8 @@ #include "code_generator_mips64.h" #endif +#include "base/bit_utils.h" +#include "base/bit_utils_iterator.h" #include "bytecode_utils.h" #include "class_linker.h" #include "compiled_method.h" @@ -337,7 +339,7 @@ void CodeGenerator::CreateCommonInvokeLocationSummary( case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: locations->SetInAt(call->GetSpecialInputIndex(), visitor->GetMethodLocation()); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: locations->AddTemp(visitor->GetMethodLocation()); locations->SetInAt(call->GetSpecialInputIndex(), Location::RequiresRegister()); break; @@ -350,6 +352,34 @@ void CodeGenerator::CreateCommonInvokeLocationSummary( } } +void CodeGenerator::GenerateInvokeStaticOrDirectRuntimeCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { + MoveConstant(temp, invoke->GetDexMethodIndex()); + + // The access check is unnecessary but we do not want to introduce + // extra entrypoints for the codegens that do not support some + // invoke type and fall back to the runtime call. + + // Initialize to anything to silent compiler warnings. + QuickEntrypointEnum entrypoint = kQuickInvokeStaticTrampolineWithAccessCheck; + switch (invoke->GetInvokeType()) { + case kStatic: + entrypoint = kQuickInvokeStaticTrampolineWithAccessCheck; + break; + case kDirect: + entrypoint = kQuickInvokeDirectTrampolineWithAccessCheck; + break; + case kSuper: + entrypoint = kQuickInvokeSuperTrampolineWithAccessCheck; + break; + case kVirtual: + case kInterface: + LOG(FATAL) << "Unexpected invoke type: " << invoke->GetInvokeType(); + UNREACHABLE(); + } + + InvokeRuntime(entrypoint, invoke, invoke->GetDexPc(), slow_path); +} void CodeGenerator::GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke) { MoveConstant(invoke->GetLocations()->GetTemp(0), invoke->GetDexMethodIndex()); diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index c9ba5c3357..7bf43f7971 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -496,6 +496,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { static void CreateCommonInvokeLocationSummary( HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor); + void GenerateInvokeStaticOrDirectRuntimeCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path); void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke); void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke); @@ -564,9 +566,11 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { HInvokeStaticOrDirect* invoke) = 0; // Generate a call to a static or direct method. - virtual void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) = 0; + virtual void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0; // Generate a call to a virtual method. - virtual void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) = 0; + virtual void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0; // Copy the result of a call into the given target. virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 097e4833d0..e4efbef394 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -19,6 +19,8 @@ #include "arch/arm/asm_support_arm.h" #include "arch/arm/instruction_set_features_arm.h" #include "art_method.h" +#include "base/bit_utils.h" +#include "base/bit_utils_iterator.h" #include "code_generator_utils.h" #include "common_arm.h" #include "compiled_method.h" @@ -47,7 +49,6 @@ static bool ExpectedPairLayout(Location location) { return ((location.low() & 1) == 0) && (location.low() + 1 == location.high()); } -static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = R0; static constexpr Register kCoreAlwaysSpillRegister = R5; @@ -2396,8 +2397,8 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -3554,18 +3555,10 @@ void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok IntrinsicLocationsBuilderARM intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { - if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) { - invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); - } return; } HandleInvoke(invoke); - - // For PC-relative dex cache the invoke has an extra input, the PC-relative address base. - if (invoke->HasPcRelativeDexCache()) { - invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); - } } static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) { @@ -3589,7 +3582,6 @@ void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec LocationSummary* locations = invoke->GetLocations(); codegen_->GenerateStaticOrDirectCall( invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) { @@ -3613,7 +3605,6 @@ void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderARM::VisitInvokeInterface(HInvokeInterface* invoke) { @@ -8955,7 +8946,8 @@ Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOr return location.AsRegister<Register>(); } -void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { +void CodeGeneratorARM::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { @@ -8983,38 +8975,23 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadImmediate(temp.AsRegister<Register>(), invoke->GetMethodAddress()); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { - HArmDexCacheArraysBase* base = - invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase(); - Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, - temp.AsRegister<Register>()); - int32_t offset = invoke->GetDexCacheArrayOffset() - base->GetElementOffset(); - __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset); - break; - } - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - Register method_reg; - Register reg = temp.AsRegister<Register>(); - if (current_method.IsRegister()) { - method_reg = current_method.AsRegister<Register>(); - } else { - DCHECK(invoke->GetLocations()->Intrinsified()); - DCHECK(!current_method.IsValid()); - method_reg = reg; - __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset); - } - // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; - __ LoadFromOffset(kLoadWord, - reg, - method_reg, - ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value()); - // temp = temp[index_in_cache]; - // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. - uint32_t index_in_cache = invoke->GetDexMethodIndex(); - __ LoadFromOffset(kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache)); + case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { + Register temp_reg = temp.AsRegister<Register>(); + PcRelativePatchInfo* labels = NewMethodBssEntryPatch( + MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); + __ BindTrackedLabel(&labels->movw_label); + __ movw(temp_reg, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->movt_label); + __ movt(temp_reg, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->add_pc_label); + __ add(temp_reg, temp_reg, ShifterOperand(PC)); + __ LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset */ 0); break; } + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. + } } switch (invoke->GetCodePtrLocation()) { @@ -9030,11 +9007,13 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, __ blx(LR); break; } + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); DCHECK(!IsLeafMethod()); } -void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) { +void CodeGeneratorARM::GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) { Register temp = temp_location.AsRegister<Register>(); uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kArmPointerSize).Uint32Value(); @@ -9065,6 +9044,7 @@ void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp __ LoadFromOffset(kLoadWord, LR, temp, entry_point); // LR(); __ blx(LR); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeMethodPatch( @@ -9074,6 +9054,13 @@ CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeMethodPatc &pc_relative_method_patches_); } +CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewMethodBssEntryPatch( + MethodReference target_method) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + &method_bss_entry_patches_); +} + CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeTypePatch( const DexFile& dex_file, dex::TypeIndex type_index) { return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_); @@ -9089,11 +9076,6 @@ CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeStringPatc return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); } -CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeDexCacheArrayPatch( - const DexFile& dex_file, uint32_t element_offset) { - return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); -} - CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativePatch( const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { patches->emplace_back(dex_file, offset_or_index); @@ -9152,15 +9134,13 @@ inline void CodeGeneratorARM::EmitPcRelativeLinkerPatches( void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_method_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * method_bss_entry_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); - EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, - linker_patches); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); @@ -9174,6 +9154,8 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, linker_patches); } + EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, + linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { @@ -9310,23 +9292,6 @@ void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr) } } -void LocationsBuilderARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base); - locations->SetOut(Location::RequiresRegister()); -} - -void InstructionCodeGeneratorARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) { - Register base_reg = base->GetLocations()->Out().AsRegister<Register>(); - CodeGeneratorARM::PcRelativePatchInfo* labels = - codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset()); - __ BindTrackedLabel(&labels->movw_label); - __ movw(base_reg, /* placeholder */ 0u); - __ BindTrackedLabel(&labels->movt_label); - __ movt(base_reg, /* placeholder */ 0u); - __ BindTrackedLabel(&labels->add_pc_label); - __ add(base_reg, base_reg, ShifterOperand(PC)); -} - void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) { if (!trg.IsValid()) { DCHECK_EQ(type, Primitive::kPrimVoid); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 5f37d3bff1..9280e6377c 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -455,8 +455,10 @@ class CodeGeneratorARM : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; @@ -481,12 +483,11 @@ class CodeGeneratorARM : public CodeGenerator { }; PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); + PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method); PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, dex::StringIndex string_index); - PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, - uint32_t element_offset); // Add a new baker read barrier patch and return the label to be bound // before the BNE instruction. @@ -667,10 +668,10 @@ class CodeGeneratorARM : public CodeGenerator { // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; - // PC-relative patch info for each HArmDexCacheArraysBase. - ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // PC-relative method patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + // PC-relative method patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index d8e709c7a9..34397e66bc 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -19,6 +19,8 @@ #include "arch/arm64/asm_support_arm64.h" #include "arch/arm64/instruction_set_features_arm64.h" #include "art_method.h" +#include "base/bit_utils.h" +#include "base/bit_utils_iterator.h" #include "code_generator_utils.h" #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" @@ -78,7 +80,6 @@ using helpers::VIXLRegCodeFromART; using helpers::WRegisterFrom; using helpers::XRegisterFrom; -static constexpr int kCurrentMethodStackOffset = 0; // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump // table version generates 7 instructions and num_entries literals. Compare/jump sequence will // generates less code/data with a small num_entries. @@ -1449,8 +1450,8 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), uint64_literals_(std::less<uint64_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -4497,7 +4498,8 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStatic return desired_dispatch_info; } -void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { +void CodeGeneratorARM64::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention. Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { @@ -4526,46 +4528,33 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok // Load method address from literal pool. __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress())); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { + case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { // Add ADRP with its PC-relative DexCache access patch. - const DexFile& dex_file = invoke->GetDexFileForPcRelativeDexCache(); - uint32_t element_offset = invoke->GetDexCacheArrayOffset(); - vixl::aarch64::Label* adrp_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset); + MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()); + vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method); EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); // Add LDR with its PC-relative DexCache access patch. vixl::aarch64::Label* ldr_label = - NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label); + NewMethodBssEntryPatch(target_method, adrp_label); EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp)); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - Register reg = XRegisterFrom(temp); - Register method_reg; - if (current_method.IsRegister()) { - method_reg = XRegisterFrom(current_method); - } else { - DCHECK(invoke->GetLocations()->Intrinsified()); - DCHECK(!current_method.IsValid()); - method_reg = reg; - __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset)); - } - - // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; - __ Ldr(reg.X(), - MemOperand(method_reg.X(), - ArtMethod::DexCacheResolvedMethodsOffset(kArm64PointerSize).Int32Value())); - // temp = temp[index_in_cache]; - // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. - uint32_t index_in_cache = invoke->GetDexMethodIndex(); - __ Ldr(reg.X(), MemOperand(reg.X(), GetCachePointerOffset(index_in_cache))); - break; + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. } } switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: - __ Bl(&frame_entry_label_); + { + // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. + ExactAssemblyScope eas(GetVIXLAssembler(), + kInstructionSize, + CodeBufferCheckScope::kExactSize); + __ bl(&frame_entry_label_); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); + } break; case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: // LR = callee_method->entry_point_from_quick_compiled_code_; @@ -4573,14 +4562,13 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok XRegisterFrom(callee_method), ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value())); { - // To ensure that the pc position is recorded immediately after the `blr` instruction - // BLR must be the last instruction emitted in this function. - // Recording the pc will occur right after returning from this function. + // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); // lr() __ blr(lr); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } break; } @@ -4588,7 +4576,8 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok DCHECK(!IsLeafMethod()); } -void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) { +void CodeGeneratorARM64::GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) { // Use the calling convention instead of the location of the receiver, as // intrinsics may have put the receiver in a different register. In the intrinsics // slow path, the arguments have been moved to the right place, so here we are @@ -4622,12 +4611,11 @@ void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location te // lr = temp->GetEntryPoint(); __ Ldr(lr, MemOperand(temp, entry_point.SizeValue())); { - // To ensure that the pc position is recorded immediately after the `blr` instruction - // BLR should be the last instruction emitted in this function. - // Recording the pc will occur right after returning from this function. + // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); // lr(); __ blr(lr); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } } @@ -4648,6 +4636,15 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeMethodPatch( &pc_relative_method_patches_); } +vixl::aarch64::Label* CodeGeneratorARM64::NewMethodBssEntryPatch( + MethodReference target_method, + vixl::aarch64::Label* adrp_label) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + adrp_label, + &method_bss_entry_patches_); +} + vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch( const DexFile& dex_file, dex::TypeIndex type_index, @@ -4670,13 +4667,6 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch( NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_); } -vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch( - const DexFile& dex_file, - uint32_t element_offset, - vixl::aarch64::Label* adrp_label) { - return NewPcRelativePatch(dex_file, element_offset, adrp_label, &pc_relative_dex_cache_patches_); -} - vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) { baker_read_barrier_patches_.emplace_back(custom_data); return &baker_read_barrier_patches_.back().label; @@ -4698,7 +4688,7 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch( vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral( uint64_t address) { - return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_); + return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address)); } vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral( @@ -4761,19 +4751,13 @@ inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches( void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - pc_relative_dex_cache_patches_.size() + pc_relative_method_patches_.size() + + method_bss_entry_patches_.size() + pc_relative_type_patches_.size() + type_bss_entry_patches_.size() + pc_relative_string_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); - for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) { - linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(), - &info.target_dex_file, - info.pc_insn_label->GetLocation(), - info.offset_or_index)); - } if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); @@ -4787,6 +4771,8 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, linker_patches); } + EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, + linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { @@ -4796,9 +4782,8 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc DCHECK_EQ(size, linker_patches->size()); } -vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value, - Uint32ToLiteralMap* map) { - return map->GetOrCreate( +vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) { + return uint32_literals_.GetOrCreate( value, [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); }); } @@ -4824,7 +4809,6 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir LocationSummary* locations = invoke->GetLocations(); codegen_->GenerateStaticOrDirectCall( invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -4837,7 +4821,6 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 747fc9f0b1..d9c49d19bb 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -540,8 +540,10 @@ class CodeGeneratorARM64 : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE { @@ -555,6 +557,13 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Label* NewPcRelativeMethodPatch(MethodReference target_method, vixl::aarch64::Label* adrp_label = nullptr); + // Add a new .bss entry method patch for an instruction and return + // the label to be bound before the instruction. The instruction will be + // either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` + // pointing to the associated ADRP patch label). + vixl::aarch64::Label* NewMethodBssEntryPatch(MethodReference target_method, + vixl::aarch64::Label* adrp_label = nullptr); + // Add a new PC-relative type patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing @@ -579,15 +588,6 @@ class CodeGeneratorARM64 : public CodeGenerator { dex::StringIndex string_index, vixl::aarch64::Label* adrp_label = nullptr); - // Add a new PC-relative dex cache array patch for an instruction and return - // the label to be bound before the instruction. The instruction will be - // either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` - // pointing to the associated ADRP patch label). - vixl::aarch64::Label* NewPcRelativeDexCacheArrayPatch( - const DexFile& dex_file, - uint32_t element_offset, - vixl::aarch64::Label* adrp_label = nullptr); - // Add a new baker read barrier patch and return the label to be bound // before the CBNZ instruction. vixl::aarch64::Label* NewBakerReadBarrierPatch(uint32_t custom_data); @@ -739,8 +739,7 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Literal<uint32_t>*, TypeReferenceValueComparator>; - vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value, - Uint32ToLiteralMap* map); + vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value); vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value); // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays @@ -791,10 +790,10 @@ class CodeGeneratorARM64 : public CodeGenerator { Uint32ToLiteralMap uint32_literals_; // Deduplication map for 64-bit literals, used for non-patchable method address or method code. Uint64ToLiteralMap uint64_literals_; - // PC-relative DexCache access info. - ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // PC-relative method patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + // PC-relative method patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 4d5f88e14a..c6bd871bc5 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -19,6 +19,8 @@ #include "arch/arm/asm_support_arm.h" #include "arch/arm/instruction_set_features_arm.h" #include "art_method.h" +#include "base/bit_utils.h" +#include "base/bit_utils_iterator.h" #include "code_generator_utils.h" #include "common_arm.h" #include "compiled_method.h" @@ -76,7 +78,6 @@ static bool ExpectedPairLayout(Location location) { // Use a local definition to prevent copying mistakes. static constexpr size_t kArmWordSize = static_cast<size_t>(kArmPointerSize); static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte; -static constexpr int kCurrentMethodStackOffset = 0; static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; // Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle @@ -2500,8 +2501,8 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -3643,18 +3644,10 @@ void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* i IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { - if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) { - invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); - } return; } HandleInvoke(invoke); - - // For PC-relative dex cache the invoke has an extra input, the PC-relative address base. - if (invoke->HasPcRelativeDexCache()) { - invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); - } } static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) { @@ -3678,7 +3671,6 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrD LocationSummary* locations = invoke->GetLocations(); codegen_->GenerateStaticOrDirectCall( invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) { @@ -3701,7 +3693,6 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) } codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); DCHECK(!codegen_->IsLeafMethod()); } @@ -9120,7 +9111,7 @@ vixl32::Register CodeGeneratorARMVIXL::GetInvokeStaticOrDirectExtraParameter( } void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( - HInvokeStaticOrDirect* invoke, Location temp) { + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { @@ -9143,44 +9134,30 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress())); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { - HArmDexCacheArraysBase* base = - invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase(); - vixl32::Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, RegisterFrom(temp)); - int32_t offset = invoke->GetDexCacheArrayOffset() - base->GetElementOffset(); - GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), base_reg, offset); + case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { + PcRelativePatchInfo* labels = NewMethodBssEntryPatch( + MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); + vixl32::Register temp_reg = RegisterFrom(temp); + EmitMovwMovtPlaceholder(labels, temp_reg); + GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - vixl32::Register method_reg; - vixl32::Register reg = RegisterFrom(temp); - if (current_method.IsRegister()) { - method_reg = RegisterFrom(current_method); - } else { - DCHECK(invoke->GetLocations()->Intrinsified()); - DCHECK(!current_method.IsValid()); - method_reg = reg; - GetAssembler()->LoadFromOffset(kLoadWord, reg, sp, kCurrentMethodStackOffset); - } - // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; - GetAssembler()->LoadFromOffset( - kLoadWord, - reg, - method_reg, - ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value()); - // temp = temp[index_in_cache]; - // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. - uint32_t index_in_cache = invoke->GetDexMethodIndex(); - GetAssembler()->LoadFromOffset( - kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache)); - break; + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. } } switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: - __ Bl(GetFrameEntryLabel()); + { + // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. + ExactAssemblyScope aas(GetVIXLAssembler(), + vixl32::k32BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ bl(GetFrameEntryLabel()); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); + } break; case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: // LR = callee_method->entry_point_from_quick_compiled_code_ @@ -9190,12 +9167,14 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( RegisterFrom(callee_method), ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); { + // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. ExactAssemblyScope aas(GetVIXLAssembler(), vixl32::k16BitT32InstructionSizeInBytes, CodeBufferCheckScope::kExactSize); // LR() __ blx(lr); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } break; } @@ -9203,7 +9182,8 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( DCHECK(!IsLeafMethod()); } -void CodeGeneratorARMVIXL::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) { +void CodeGeneratorARMVIXL::GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) { vixl32::Register temp = RegisterFrom(temp_location); uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kArmPointerSize).Uint32Value(); @@ -9239,15 +9219,16 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall(HInvokeVirtual* invoke, Location GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset); // LR = temp->GetEntryPoint(); GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point); - // LR(); - // This `blx` *must* be the *last* instruction generated by this stub, so that calls to - // `RecordPcInfo()` immediately following record the correct pc. Use a scope to help guarantee - // that. - // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. - ExactAssemblyScope aas(GetVIXLAssembler(), - vixl32::k16BitT32InstructionSizeInBytes, - CodeBufferCheckScope::kExactSize); - __ blx(lr); + { + // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. + // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. + ExactAssemblyScope aas(GetVIXLAssembler(), + vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + // LR(); + __ blx(lr); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); + } } CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeMethodPatch( @@ -9257,6 +9238,13 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeMe &pc_relative_method_patches_); } +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewMethodBssEntryPatch( + MethodReference target_method) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + &method_bss_entry_patches_); +} + CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeTypePatch( const DexFile& dex_file, dex::TypeIndex type_index) { return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_); @@ -9272,11 +9260,6 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeSt return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); } -CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeDexCacheArrayPatch( - const DexFile& dex_file, uint32_t element_offset) { - return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); -} - CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePatch( const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { patches->emplace_back(dex_file, offset_or_index); @@ -9340,15 +9323,13 @@ inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches( void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_method_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * method_bss_entry_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); - EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, - linker_patches); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); @@ -9362,6 +9343,8 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, linker_patches); } + EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, + linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { @@ -9511,17 +9494,6 @@ void InstructionCodeGeneratorARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_in } } } -void LocationsBuilderARMVIXL::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base); - locations->SetOut(Location::RequiresRegister()); -} - -void InstructionCodeGeneratorARMVIXL::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) { - vixl32::Register base_reg = OutputRegister(base); - CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset()); - codegen_->EmitMovwMovtPlaceholder(labels, base_reg); -} // Copy the result of a call into the given target. void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, Primitive::Type type) { diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index f6e4de33a8..805a3f4366 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -538,8 +538,10 @@ class CodeGeneratorARMVIXL : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; @@ -564,12 +566,11 @@ class CodeGeneratorARMVIXL : public CodeGenerator { }; PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); + PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method); PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, dex::StringIndex string_index); - PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, - uint32_t element_offset); // Add a new baker read barrier patch and return the label to be bound // before the BNE instruction. @@ -764,10 +765,10 @@ class CodeGeneratorARMVIXL : public CodeGenerator { // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; - // PC-relative patch info for each HArmDexCacheArraysBase. - ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // PC-relative method patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + // PC-relative method patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 951d75a708..b39d412ac2 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -40,10 +40,6 @@ namespace mips { static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = A0; -// We'll maximize the range of a single load instruction for dex cache array accesses -// by aligning offset -32768 with the offset of the first used element. -static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000; - Location MipsReturnLocation(Primitive::Type return_type) { switch (return_type) { case Primitive::kPrimBoolean: @@ -1060,8 +1056,8 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -1602,14 +1598,12 @@ inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches( void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - pc_relative_dex_cache_patches_.size() + pc_relative_method_patches_.size() + + method_bss_entry_patches_.size() + pc_relative_type_patches_.size() + type_bss_entry_patches_.size() + pc_relative_string_patches_.size(); linker_patches->reserve(size); - EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, - linker_patches); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); @@ -1623,6 +1617,8 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, linker_patches); } + EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, + linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); DCHECK_EQ(size, linker_patches->size()); @@ -1635,6 +1631,13 @@ CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeMethodPa &pc_relative_method_patches_); } +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewMethodBssEntryPatch( + MethodReference target_method) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + &method_bss_entry_patches_); +} + CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeTypePatch( const DexFile& dex_file, dex::TypeIndex type_index) { return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_); @@ -1650,11 +1653,6 @@ CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPa return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); } -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch( - const DexFile& dex_file, uint32_t element_offset) { - return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); -} - CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativePatch( const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { patches->emplace_back(dex_file, offset_or_index); @@ -7000,7 +6998,7 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization // is incompatible with it. - // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods + // TODO: Create as many HMipsComputeBaseMethodAddress instructions as needed for methods // with irreducible loops. bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); bool is_r6 = GetInstructionSetFeatures().IsR6(); @@ -7030,6 +7028,8 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) { // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization // is incompatible with it. + // TODO: Create as many HMipsComputeBaseMethodAddress instructions as needed for methods + // with irreducible loops. bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); bool is_r6 = GetInstructionSetFeatures().IsR6(); bool fallback_load = has_irreducible_loops && !is_r6; @@ -7093,25 +7093,28 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticO HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info; // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization // is incompatible with it. + // TODO: Create as many HMipsComputeBaseMethodAddress instructions as needed for methods + // with irreducible loops. bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); bool is_r6 = GetInstructionSetFeatures().IsR6(); bool fallback_load = has_irreducible_loops && !is_r6; switch (dispatch_info.method_load_kind) { case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: + case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: break; default: fallback_load = false; break; } if (fallback_load) { - dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod; + dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall; dispatch_info.method_load_data = 0; } return dispatch_info; } -void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { +void CodeGeneratorMIPS::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { // All registers are assumed to be correctly set up per the calling convention. Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); @@ -7148,51 +7151,20 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - if (is_r6) { - uint32_t offset = invoke->GetDexCacheArrayOffset(); - CodeGeneratorMIPS::PcRelativePatchInfo* info = - NewPcRelativeDexCacheArrayPatch(invoke->GetDexFileForPcRelativeDexCache(), offset); - bool reordering = __ SetReorder(false); - EmitPcRelativeAddressPlaceholderHigh(info, TMP, ZERO); - __ Lw(temp.AsRegister<Register>(), TMP, /* placeholder */ 0x5678); - __ SetReorder(reordering); - } else { - HMipsDexCacheArraysBase* base = - invoke->InputAt(invoke->GetSpecialInputIndex())->AsMipsDexCacheArraysBase(); - int32_t offset = - invoke->GetDexCacheArrayOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset; - __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset); - } - break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - Register reg = temp.AsRegister<Register>(); - Register method_reg; - if (current_method.IsRegister()) { - method_reg = current_method.AsRegister<Register>(); - } else { - // TODO: use the appropriate DCHECK() here if possible. - // DCHECK(invoke->GetLocations()->Intrinsified()); - DCHECK(!current_method.IsValid()); - method_reg = reg; - __ Lw(reg, SP, kCurrentMethodStackOffset); - } - - // temp = temp->dex_cache_resolved_methods_; - __ LoadFromOffset(kLoadWord, - reg, - method_reg, - ArtMethod::DexCacheResolvedMethodsOffset(kMipsPointerSize).Int32Value()); - // temp = temp[index_in_cache]; - // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. - uint32_t index_in_cache = invoke->GetDexMethodIndex(); - __ LoadFromOffset(kLoadWord, - reg, - reg, - CodeGenerator::GetCachePointerOffset(index_in_cache)); + case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { + PcRelativePatchInfo* info = NewMethodBssEntryPatch( + MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); + Register temp_reg = temp.AsRegister<Register>(); + bool reordering = __ SetReorder(false); + EmitPcRelativeAddressPlaceholderHigh(info, TMP, base_reg); + __ Lw(temp_reg, TMP, /* placeholder */ 0x5678); + __ SetReorder(reordering); break; } + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. + } } switch (code_ptr_location) { @@ -7211,6 +7183,8 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke __ NopIfNoReordering(); break; } + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); + DCHECK(!IsLeafMethod()); } @@ -7228,10 +7202,10 @@ void InstructionCodeGeneratorMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDire locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } -void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) { +void CodeGeneratorMIPS::GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) { // Use the calling convention instead of the location of the receiver, as // intrinsics may have put the receiver in a different register. In the intrinsics // slow path, the arguments have been moved to the right place, so here we are @@ -7263,6 +7237,7 @@ void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location tem // T9(); __ Jalr(T9); __ NopIfNoReordering(); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -7272,7 +7247,6 @@ void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) { codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { @@ -8732,29 +8706,11 @@ void InstructionCodeGeneratorMIPS::VisitMipsComputeBaseMethodAddress( __ Nal(); // Grab the return address off RA. __ Move(reg, RA); - // TODO: Can we share this code with that of VisitMipsDexCacheArraysBase()? // Remember this offset (the obtained PC value) for later use with constant area. __ BindPcRelBaseLabel(); } -void LocationsBuilderMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArraysBase* base) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base); - locations->SetOut(Location::RequiresRegister()); -} - -void InstructionCodeGeneratorMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArraysBase* base) { - Register reg = base->GetLocations()->Out().AsRegister<Register>(); - CodeGeneratorMIPS::PcRelativePatchInfo* info = - codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset()); - CHECK(!codegen_->GetInstructionSetFeatures().IsR6()); - bool reordering = __ SetReorder(false); - // TODO: Reuse MipsComputeBaseMethodAddress on R2 instead of passing ZERO to force emitting NAL. - codegen_->EmitPcRelativeAddressPlaceholderHigh(info, reg, ZERO); - __ Addiu(reg, reg, /* placeholder */ 0x5678); - __ SetReorder(reordering); -} - void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { // The trampoline uses the same calling convention as dex calling conventions, // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 736b5070d9..e72e838dd9 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -552,8 +552,10 @@ class CodeGeneratorMIPS : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE { @@ -583,12 +585,11 @@ class CodeGeneratorMIPS : public CodeGenerator { }; PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); + PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method); PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, dex::StringIndex string_index); - PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, - uint32_t element_offset); Literal* DeduplicateBootImageAddressLiteral(uint32_t address); void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info, Register out, Register base); @@ -643,10 +644,10 @@ class CodeGeneratorMIPS : public CodeGenerator { // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; - // PC-relative patch info for each HMipsDexCacheArraysBase. - ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // PC-relative method patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + // PC-relative method patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 6026814f04..e4f1cbd600 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -957,8 +957,8 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), uint64_literals_(std::less<uint64_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -1440,14 +1440,12 @@ inline void CodeGeneratorMIPS64::EmitPcRelativeLinkerPatches( void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - pc_relative_dex_cache_patches_.size() + pc_relative_method_patches_.size() + + method_bss_entry_patches_.size() + pc_relative_type_patches_.size() + type_bss_entry_patches_.size() + pc_relative_string_patches_.size(); linker_patches->reserve(size); - EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, - linker_patches); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); @@ -1461,6 +1459,8 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, linker_patches); } + EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, + linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); DCHECK_EQ(size, linker_patches->size()); @@ -1473,6 +1473,13 @@ CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeMeth &pc_relative_method_patches_); } +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewMethodBssEntryPatch( + MethodReference target_method) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + &method_bss_entry_patches_); +} + CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeTypePatch( const DexFile& dex_file, dex::TypeIndex type_index) { return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_); @@ -1488,11 +1495,6 @@ CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStri return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); } -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeDexCacheArrayPatch( - const DexFile& dex_file, uint32_t element_offset) { - return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); -} - CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativePatch( const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { patches->emplace_back(dex_file, offset_or_index); @@ -4915,7 +4917,8 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS64::GetSupportedInvokeStati return desired_dispatch_info; } -void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { +void CodeGeneratorMIPS64::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { // All registers are assumed to be correctly set up per the calling convention. Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); @@ -4948,41 +4951,16 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo kLoadDoubleword, DeduplicateUint64Literal(invoke->GetMethodAddress())); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { - uint32_t offset = invoke->GetDexCacheArrayOffset(); - CodeGeneratorMIPS64::PcRelativePatchInfo* info = - NewPcRelativeDexCacheArrayPatch(invoke->GetDexFileForPcRelativeDexCache(), offset); + case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { + PcRelativePatchInfo* info = NewMethodBssEntryPatch( + MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); EmitPcRelativeAddressPlaceholderHigh(info, AT); __ Ld(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - GpuRegister reg = temp.AsRegister<GpuRegister>(); - GpuRegister method_reg; - if (current_method.IsRegister()) { - method_reg = current_method.AsRegister<GpuRegister>(); - } else { - // TODO: use the appropriate DCHECK() here if possible. - // DCHECK(invoke->GetLocations()->Intrinsified()); - DCHECK(!current_method.IsValid()); - method_reg = reg; - __ Ld(reg, SP, kCurrentMethodStackOffset); - } - - // temp = temp->dex_cache_resolved_methods_; - __ LoadFromOffset(kLoadDoubleword, - reg, - method_reg, - ArtMethod::DexCacheResolvedMethodsOffset(kMips64PointerSize).Int32Value()); - // temp = temp[index_in_cache]; - // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. - uint32_t index_in_cache = invoke->GetDexMethodIndex(); - __ LoadFromOffset(kLoadDoubleword, - reg, - reg, - CodeGenerator::GetCachePointerOffset(index_in_cache)); - break; + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. } } @@ -5002,6 +4980,8 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo __ Nop(); break; } + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); + DCHECK(!IsLeafMethod()); } @@ -5019,10 +4999,10 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDi locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } -void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) { +void CodeGeneratorMIPS64::GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) { // Use the calling convention instead of the location of the receiver, as // intrinsics may have put the receiver in a different register. In the intrinsics // slow path, the arguments have been moved to the right place, so here we are @@ -5054,6 +5034,7 @@ void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t // T9(); __ Jalr(T9); __ Nop(); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -5063,7 +5044,6 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 9c6b6f62cb..6260c73614 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -521,8 +521,10 @@ class CodeGeneratorMIPS64 : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE { @@ -549,12 +551,11 @@ class CodeGeneratorMIPS64 : public CodeGenerator { }; PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); + PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method); PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, dex::StringIndex string_index); - PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, - uint32_t element_offset); PcRelativePatchInfo* NewPcRelativeCallPatch(const DexFile& dex_file, uint32_t method_index); Literal* DeduplicateBootImageAddressLiteral(uint64_t address); @@ -607,10 +608,10 @@ class CodeGeneratorMIPS64 : public CodeGenerator { // Deduplication map for 64-bit literals, used for non-patchable method address or method code // address. Uint64ToLiteralMap uint64_literals_; - // PC-relative patch info. - ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // PC-relative method patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + // PC-relative method patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index a41adca02c..f422b9fc8b 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -22,6 +22,8 @@ using namespace vixl::aarch64; // NOLINT(build/namespaces) namespace art { namespace arm64 { +using helpers::ARM64EncodableConstantOrRegister; +using helpers::Arm64CanEncodeConstantAsImmediate; using helpers::DRegisterFrom; using helpers::VRegisterFrom; using helpers::HeapOperand; @@ -34,6 +36,7 @@ using helpers::WRegisterFrom; void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + HInstruction* input = instruction->InputAt(0); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -41,13 +44,19 @@ void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruc case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: - locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(0, ARM64EncodableConstantOrRegister(input, instruction)); locations->SetOut(Location::RequiresFpuRegister()); break; case Primitive::kPrimFloat: case Primitive::kPrimDouble: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + if (input->IsConstant() && + Arm64CanEncodeConstantAsImmediate(input->AsConstant(), instruction)) { + locations->SetInAt(0, Location::ConstantLocation(input->AsConstant())); + locations->SetOut(Location::RequiresFpuRegister()); + } else { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + } break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -57,33 +66,58 @@ void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruc void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { LocationSummary* locations = instruction->GetLocations(); + Location src_loc = locations->InAt(0); VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: DCHECK_EQ(16u, instruction->GetVectorLength()); - __ Dup(dst.V16B(), InputRegisterAt(instruction, 0)); + if (src_loc.IsConstant()) { + __ Movi(dst.V16B(), Int64ConstantFrom(src_loc)); + } else { + __ Dup(dst.V16B(), InputRegisterAt(instruction, 0)); + } break; case Primitive::kPrimChar: case Primitive::kPrimShort: DCHECK_EQ(8u, instruction->GetVectorLength()); - __ Dup(dst.V8H(), InputRegisterAt(instruction, 0)); + if (src_loc.IsConstant()) { + __ Movi(dst.V8H(), Int64ConstantFrom(src_loc)); + } else { + __ Dup(dst.V8H(), InputRegisterAt(instruction, 0)); + } break; case Primitive::kPrimInt: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ Dup(dst.V4S(), InputRegisterAt(instruction, 0)); + if (src_loc.IsConstant()) { + __ Movi(dst.V4S(), Int64ConstantFrom(src_loc)); + } else { + __ Dup(dst.V4S(), InputRegisterAt(instruction, 0)); + } break; case Primitive::kPrimLong: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ Dup(dst.V2D(), XRegisterFrom(locations->InAt(0))); + if (src_loc.IsConstant()) { + __ Movi(dst.V2D(), Int64ConstantFrom(src_loc)); + } else { + __ Dup(dst.V2D(), XRegisterFrom(src_loc)); + } break; case Primitive::kPrimFloat: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ Dup(dst.V4S(), VRegisterFrom(locations->InAt(0)).V4S(), 0); + if (src_loc.IsConstant()) { + __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue()); + } else { + __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0); + } break; case Primitive::kPrimDouble: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ Dup(dst.V2D(), VRegisterFrom(locations->InAt(0)).V2D(), 0); + if (src_loc.IsConstant()) { + __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue()); + } else { + __ Dup(dst.V2D(), VRegisterFrom(src_loc).V2D(), 0); + } break; default: LOG(FATAL) << "Unsupported SIMD type"; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index b8465cd9d5..83a261d334 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1032,8 +1032,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, move_resolver_(graph->GetArena(), this), assembler_(graph->GetArena()), isa_features_(isa_features), - pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), boot_image_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -2204,7 +2204,6 @@ void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec LocationSummary* locations = invoke->GetLocations(); codegen_->GenerateStaticOrDirectCall( invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -2228,7 +2227,6 @@ void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) { @@ -4530,7 +4528,8 @@ Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOr return location.AsRegister<Register>(); } -void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { +void CodeGeneratorX86::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { @@ -4554,38 +4553,19 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress())); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { + case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()); __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset)); // Bind a new fixup label at the end of the "movl" insn. - uint32_t offset = invoke->GetDexCacheArrayOffset(); - __ Bind(NewPcRelativeDexCacheArrayPatch( + __ Bind(NewMethodBssEntryPatch( invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(), - invoke->GetDexFileForPcRelativeDexCache(), - offset)); + MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()))); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - Register method_reg; - Register reg = temp.AsRegister<Register>(); - if (current_method.IsRegister()) { - method_reg = current_method.AsRegister<Register>(); - } else { - DCHECK(invoke->GetLocations()->Intrinsified()); - DCHECK(!current_method.IsValid()); - method_reg = reg; - __ movl(reg, Address(ESP, kCurrentMethodStackOffset)); - } - // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; - __ movl(reg, Address(method_reg, - ArtMethod::DexCacheResolvedMethodsOffset(kX86PointerSize).Int32Value())); - // temp = temp[index_in_cache]; - // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. - uint32_t index_in_cache = invoke->GetDexMethodIndex(); - __ movl(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache))); - break; + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. } } @@ -4600,11 +4580,13 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, kX86PointerSize).Int32Value())); break; } + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); DCHECK(!IsLeafMethod()); } -void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) { +void CodeGeneratorX86::GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) { Register temp = temp_in.AsRegister<Register>(); uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kX86PointerSize).Uint32Value(); @@ -4632,6 +4614,7 @@ void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp // call temp->GetEntryPoint(); __ call(Address( temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value())); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } void CodeGeneratorX86::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { @@ -4644,6 +4627,16 @@ void CodeGeneratorX86::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { __ Bind(&boot_image_method_patches_.back().label); } +Label* CodeGeneratorX86::NewMethodBssEntryPatch( + HX86ComputeBaseMethodAddress* method_address, + MethodReference target_method) { + // Add the patch entry and bind its label at the end of the instruction. + method_bss_entry_patches_.emplace_back(method_address, + *target_method.dex_file, + target_method.dex_method_index); + return &method_bss_entry_patches_.back().label; +} + void CodeGeneratorX86::RecordBootTypePatch(HLoadClass* load_class) { HX86ComputeBaseMethodAddress* address = load_class->InputAt(0)->AsX86ComputeBaseMethodAddress(); boot_image_type_patches_.emplace_back(address, @@ -4678,15 +4671,6 @@ Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) { return &string_patches_.back().label; } -Label* CodeGeneratorX86::NewPcRelativeDexCacheArrayPatch( - HX86ComputeBaseMethodAddress* method_address, - const DexFile& dex_file, - uint32_t element_offset) { - // Add the patch entry and bind its label at the end of the instruction. - pc_relative_dex_cache_patches_.emplace_back(method_address, dex_file, element_offset); - return &pc_relative_dex_cache_patches_.back().label; -} - // The label points to the end of the "movl" or another instruction but the literal offset // for method patch needs to point to the embedded constant which occupies the last 4 bytes. constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; @@ -4705,14 +4689,12 @@ inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches( void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - pc_relative_dex_cache_patches_.size() + boot_image_method_patches_.size() + + method_bss_entry_patches_.size() + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + string_patches_.size(); linker_patches->reserve(size); - EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, - linker_patches); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_, linker_patches); @@ -4724,6 +4706,8 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche DCHECK(boot_image_type_patches_.empty()); EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); } + EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, + linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); DCHECK_EQ(size, linker_patches->size()); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 8130bd9d25..f48753b614 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -408,18 +408,19 @@ class CodeGeneratorX86 : public CodeGenerator { HInvokeStaticOrDirect* invoke) OVERRIDE; // Generate a call to a static or direct method. - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; + void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; // Generate a call to a virtual method. - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke); + Label* NewMethodBssEntryPatch(HX86ComputeBaseMethodAddress* method_address, + MethodReference target_method); void RecordBootTypePatch(HLoadClass* load_class); Label* NewTypeBssEntryPatch(HLoadClass* load_class); void RecordBootStringPatch(HLoadString* load_string); Label* NewStringBssEntryPatch(HLoadString* load_string); - Label* NewPcRelativeDexCacheArrayPatch(HX86ComputeBaseMethodAddress* method_address, - const DexFile& dex_file, - uint32_t element_offset); Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex dex_index, Handle<mirror::String> handle); @@ -631,10 +632,10 @@ class CodeGeneratorX86 : public CodeGenerator { X86Assembler assembler_; const X86InstructionSetFeatures& isa_features_; - // PC-relative DexCache access info. - ArenaDeque<X86PcRelativePatchInfo> pc_relative_dex_cache_patches_; // PC-relative method patch info for kBootImageLinkTimePcRelative. ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_; + // PC-relative method patch info for kBssEntry. + ArenaDeque<X86PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_; // Type patch locations for kBssEntry. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 8dde298267..7331a9e98e 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -977,8 +977,8 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStati return desired_dispatch_info; } -void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Location temp) { +void CodeGeneratorX86_64::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { // All registers are assumed to be correctly set up. Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. @@ -1002,35 +1002,17 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress()); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { + case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { __ movq(temp.AsRegister<CpuRegister>(), Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); // Bind a new fixup label at the end of the "movl" insn. - uint32_t offset = invoke->GetDexCacheArrayOffset(); - __ Bind(NewPcRelativeDexCacheArrayPatch(invoke->GetDexFileForPcRelativeDexCache(), offset)); + __ Bind(NewMethodBssEntryPatch( + MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()))); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - Register method_reg; - CpuRegister reg = temp.AsRegister<CpuRegister>(); - if (current_method.IsRegister()) { - method_reg = current_method.AsRegister<Register>(); - } else { - DCHECK(invoke->GetLocations()->Intrinsified()); - DCHECK(!current_method.IsValid()); - method_reg = reg.AsRegister(); - __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset)); - } - // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; - __ movq(reg, - Address(CpuRegister(method_reg), - ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue())); - // temp = temp[index_in_cache]; - // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. - uint32_t index_in_cache = invoke->GetDexMethodIndex(); - __ movq(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache))); - break; + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. } } @@ -1045,11 +1027,13 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo kX86_64PointerSize).SizeValue())); break; } + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); DCHECK(!IsLeafMethod()); } -void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) { +void CodeGeneratorX86_64::GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) { CpuRegister temp = temp_in.AsRegister<CpuRegister>(); size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue(); @@ -1078,6 +1062,7 @@ void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t // call temp->GetEntryPoint(); __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( kX86_64PointerSize).SizeValue())); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } void CodeGeneratorX86_64::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { @@ -1086,6 +1071,12 @@ void CodeGeneratorX86_64::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { __ Bind(&boot_image_method_patches_.back().label); } +Label* CodeGeneratorX86_64::NewMethodBssEntryPatch(MethodReference target_method) { + // Add a patch entry and return the label. + method_bss_entry_patches_.emplace_back(*target_method.dex_file, target_method.dex_method_index); + return &method_bss_entry_patches_.back().label; +} + void CodeGeneratorX86_64::RecordBootTypePatch(HLoadClass* load_class) { boot_image_type_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex().index_); @@ -1109,13 +1100,6 @@ Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) { return &string_patches_.back().label; } -Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, - uint32_t element_offset) { - // Add a patch entry and return the label. - pc_relative_dex_cache_patches_.emplace_back(dex_file, element_offset); - return &pc_relative_dex_cache_patches_.back().label; -} - // The label points to the end of the "movl" or another instruction but the literal offset // for method patch needs to point to the embedded constant which occupies the last 4 bytes. constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; @@ -1134,14 +1118,12 @@ inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches( void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - pc_relative_dex_cache_patches_.size() + boot_image_method_patches_.size() + + method_bss_entry_patches_.size() + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + string_patches_.size(); linker_patches->reserve(size); - EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, - linker_patches); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_, linker_patches); @@ -1153,6 +1135,8 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat DCHECK(boot_image_type_patches_.empty()); EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); } + EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, + linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); DCHECK_EQ(size, linker_patches->size()); @@ -1241,8 +1225,8 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, assembler_(graph->GetArena()), isa_features_(isa_features), constant_area_start_(0), - pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), boot_image_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -2387,7 +2371,6 @@ void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDi LocationSummary* locations = invoke->GetLocations(); codegen_->GenerateStaticOrDirectCall( invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { @@ -2411,7 +2394,6 @@ void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 25479814d0..33c64290d4 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -404,15 +404,17 @@ class CodeGeneratorX86_64 : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke); + Label* NewMethodBssEntryPatch(MethodReference target_method); void RecordBootTypePatch(HLoadClass* load_class); Label* NewTypeBssEntryPatch(HLoadClass* load_class); void RecordBootStringPatch(HLoadString* load_string); Label* NewStringBssEntryPatch(HLoadString* load_string); - Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex dex_index, Handle<mirror::String> handle); @@ -601,10 +603,10 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Used for fixups to the constant area. int constant_area_start_; - // PC-relative DexCache access info. - ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_; // PC-relative method patch info for kBootImageLinkTimePcRelative. ArenaDeque<PatchInfo<Label>> boot_image_method_patches_; + // PC-relative method patch info for kBssEntry. + ArenaDeque<PatchInfo<Label>> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PatchInfo<Label>> boot_image_type_patches_; // Type patch locations for kBssEntry. diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index 721f74eeee..e73fd7ddc8 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -234,9 +234,20 @@ inline vixl::aarch64::Operand OperandFromMemOperand( } } -inline bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) { - DCHECK(constant->IsIntConstant() || constant->IsLongConstant() || constant->IsNullConstant()) - << constant->DebugName(); +inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) { + int64_t value = CodeGenerator::GetInt64ValueOf(constant); + + // TODO: Improve this when IsSIMDConstantEncodable method is implemented in VIXL. + if (instr->IsVecReplicateScalar()) { + if (constant->IsLongConstant()) { + return false; + } else if (constant->IsFloatConstant()) { + return vixl::aarch64::Assembler::IsImmFP32(constant->AsFloatConstant()->GetValue()); + } else if (constant->IsDoubleConstant()) { + return vixl::aarch64::Assembler::IsImmFP64(constant->AsDoubleConstant()->GetValue()); + } + return IsUint<8>(value); + } // For single uses we let VIXL handle the constant generation since it will // use registers that are not managed by the register allocator (wip0, wip1). @@ -249,8 +260,6 @@ inline bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* inst return true; } - int64_t value = CodeGenerator::GetInt64ValueOf(constant); - if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) { // Uses logical operations. return vixl::aarch64::Assembler::IsImmLogical(value, vixl::aarch64::kXRegSize); @@ -276,7 +285,7 @@ inline bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* inst inline Location ARM64EncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) { if (constant->IsConstant() - && CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { + && Arm64CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { return Location::ConstantLocation(constant->AsConstant()); } diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc deleted file mode 100644 index 0c832a5c35..0000000000 --- a/compiler/optimizing/dex_cache_array_fixups_arm.cc +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dex_cache_array_fixups_arm.h" - -#include "base/arena_containers.h" -#ifdef ART_USE_OLD_ARM_BACKEND -#include "code_generator_arm.h" -#include "intrinsics_arm.h" -#else -#include "code_generator_arm_vixl.h" -#include "intrinsics_arm_vixl.h" -#endif -#include "utils/dex_cache_arrays_layout-inl.h" - -namespace art { -namespace arm { -#ifdef ART_USE_OLD_ARM_BACKEND -typedef CodeGeneratorARM CodeGeneratorARMType; -typedef IntrinsicLocationsBuilderARM IntrinsicLocationsBuilderARMType; -#else -typedef CodeGeneratorARMVIXL CodeGeneratorARMType; -typedef IntrinsicLocationsBuilderARMVIXL IntrinsicLocationsBuilderARMType; -#endif - -/** - * Finds instructions that need the dex cache arrays base as an input. - */ -class DexCacheArrayFixupsVisitor : public HGraphVisitor { - public: - DexCacheArrayFixupsVisitor(HGraph* graph, CodeGenerator* codegen) - : HGraphVisitor(graph), - codegen_(down_cast<CodeGeneratorARMType*>(codegen)), - dex_cache_array_bases_(std::less<const DexFile*>(), - // Attribute memory use to code generator. - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {} - - void MoveBasesIfNeeded() { - for (const auto& entry : dex_cache_array_bases_) { - // Bring the base closer to the first use (previously, it was in the - // entry block) and relieve some pressure on the register allocator - // while avoiding recalculation of the base in a loop. - HArmDexCacheArraysBase* base = entry.second; - base->MoveBeforeFirstUserAndOutOfLoops(); - } - } - - private: - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { - // If this is an invoke with PC-relative access to the dex cache methods array, - // we need to add the dex cache arrays base as the special input. - if (invoke->HasPcRelativeDexCache() && - !IsCallFreeIntrinsic<IntrinsicLocationsBuilderARMType>(invoke, codegen_)) { - HArmDexCacheArraysBase* base = - GetOrCreateDexCacheArrayBase(invoke, invoke->GetDexFileForPcRelativeDexCache()); - // Update the element offset in base. - DexCacheArraysLayout layout(kArmPointerSize, &invoke->GetDexFileForPcRelativeDexCache()); - base->UpdateElementOffset(layout.MethodOffset(invoke->GetDexMethodIndex())); - // Add the special argument base to the method. - DCHECK(!invoke->HasCurrentMethodInput()); - invoke->AddSpecialInput(base); - } - } - - HArmDexCacheArraysBase* GetOrCreateDexCacheArrayBase(HInstruction* cursor, - const DexFile& dex_file) { - if (GetGraph()->HasIrreducibleLoops()) { - HArmDexCacheArraysBase* base = new (GetGraph()->GetArena()) HArmDexCacheArraysBase(dex_file); - cursor->GetBlock()->InsertInstructionBefore(base, cursor); - return base; - } else { - // Ensure we only initialize the pointer once for each dex file. - auto lb = dex_cache_array_bases_.lower_bound(&dex_file); - if (lb != dex_cache_array_bases_.end() && - !dex_cache_array_bases_.key_comp()(&dex_file, lb->first)) { - return lb->second; - } - - // Insert the base at the start of the entry block, move it to a better - // position later in MoveBaseIfNeeded(). - HArmDexCacheArraysBase* base = new (GetGraph()->GetArena()) HArmDexCacheArraysBase(dex_file); - HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); - entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction()); - dex_cache_array_bases_.PutBefore(lb, &dex_file, base); - return base; - } - } - - CodeGeneratorARMType* codegen_; - - using DexCacheArraysBaseMap = - ArenaSafeMap<const DexFile*, HArmDexCacheArraysBase*, std::less<const DexFile*>>; - DexCacheArraysBaseMap dex_cache_array_bases_; -}; - -void DexCacheArrayFixups::Run() { - DexCacheArrayFixupsVisitor visitor(graph_, codegen_); - visitor.VisitInsertionOrder(); - visitor.MoveBasesIfNeeded(); -} - -} // namespace arm -} // namespace art diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.h b/compiler/optimizing/dex_cache_array_fixups_arm.h deleted file mode 100644 index 9d67a319b9..0000000000 --- a/compiler/optimizing/dex_cache_array_fixups_arm.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_ -#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_ - -#include "nodes.h" -#include "optimization.h" - -namespace art { - -class CodeGenerator; - -namespace arm { - -class DexCacheArrayFixups : public HOptimization { - public: - DexCacheArrayFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) - : HOptimization(graph, kDexCacheArrayFixupsArmPassName, stats), - codegen_(codegen) {} - - static constexpr const char* kDexCacheArrayFixupsArmPassName = "dex_cache_array_fixups_arm"; - - void Run() OVERRIDE; - - private: - CodeGenerator* codegen_; -}; - -} // namespace arm -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_ diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc deleted file mode 100644 index 7734f9197d..0000000000 --- a/compiler/optimizing/dex_cache_array_fixups_mips.cc +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "code_generator_mips.h" -#include "dex_cache_array_fixups_mips.h" - -#include "base/arena_containers.h" -#include "intrinsics_mips.h" -#include "utils/dex_cache_arrays_layout-inl.h" - -namespace art { -namespace mips { - -/** - * Finds instructions that need the dex cache arrays base as an input. - */ -class DexCacheArrayFixupsVisitor : public HGraphVisitor { - public: - explicit DexCacheArrayFixupsVisitor(HGraph* graph, CodeGenerator* codegen) - : HGraphVisitor(graph), - codegen_(down_cast<CodeGeneratorMIPS*>(codegen)), - dex_cache_array_bases_(std::less<const DexFile*>(), - // Attribute memory use to code generator. - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {} - - void MoveBasesIfNeeded() { - for (const auto& entry : dex_cache_array_bases_) { - // Bring the base closer to the first use (previously, it was in the - // entry block) and relieve some pressure on the register allocator - // while avoiding recalculation of the base in a loop. - HMipsDexCacheArraysBase* base = entry.second; - base->MoveBeforeFirstUserAndOutOfLoops(); - } - // Computing the dex cache base for PC-relative accesses will clobber RA with - // the NAL instruction on R2. Take a note of this before generating the method - // entry. - if (!dex_cache_array_bases_.empty()) { - codegen_->ClobberRA(); - } - } - - private: - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { - // If this is an invoke with PC-relative access to the dex cache methods array, - // we need to add the dex cache arrays base as the special input. - if (invoke->HasPcRelativeDexCache() && - !IsCallFreeIntrinsic<IntrinsicLocationsBuilderMIPS>(invoke, codegen_)) { - // Initialize base for target method dex file if needed. - HMipsDexCacheArraysBase* base = - GetOrCreateDexCacheArrayBase(invoke->GetDexFileForPcRelativeDexCache()); - // Update the element offset in base. - DexCacheArraysLayout layout(kMipsPointerSize, &invoke->GetDexFileForPcRelativeDexCache()); - base->UpdateElementOffset(layout.MethodOffset(invoke->GetDexMethodIndex())); - // Add the special argument base to the method. - DCHECK(!invoke->HasCurrentMethodInput()); - invoke->AddSpecialInput(base); - } - } - - HMipsDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) { - return dex_cache_array_bases_.GetOrCreate( - &dex_file, - [this, &dex_file]() { - HMipsDexCacheArraysBase* base = - new (GetGraph()->GetArena()) HMipsDexCacheArraysBase(dex_file); - HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); - // Insert the base at the start of the entry block, move it to a better - // position later in MoveBaseIfNeeded(). - entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction()); - return base; - }); - } - - CodeGeneratorMIPS* codegen_; - - using DexCacheArraysBaseMap = - ArenaSafeMap<const DexFile*, HMipsDexCacheArraysBase*, std::less<const DexFile*>>; - DexCacheArraysBaseMap dex_cache_array_bases_; -}; - -void DexCacheArrayFixups::Run() { - CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_); - if (mips_codegen->GetInstructionSetFeatures().IsR6()) { - // Do nothing for R6 because it has PC-relative addressing. - return; - } - if (graph_->HasIrreducibleLoops()) { - // Do not run this optimization, as irreducible loops do not work with an instruction - // that can be live-in at the irreducible loop header. - return; - } - DexCacheArrayFixupsVisitor visitor(graph_, codegen_); - visitor.VisitInsertionOrder(); - visitor.MoveBasesIfNeeded(); -} - -} // namespace mips -} // namespace art diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.h b/compiler/optimizing/dex_cache_array_fixups_mips.h deleted file mode 100644 index 861a199d6c..0000000000 --- a/compiler/optimizing/dex_cache_array_fixups_mips.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_ -#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_ - -#include "nodes.h" -#include "optimization.h" - -namespace art { - -class CodeGenerator; - -namespace mips { - -class DexCacheArrayFixups : public HOptimization { - public: - DexCacheArrayFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) - : HOptimization(graph, kDexCacheArrayFixupsMipsPassName, stats), - codegen_(codegen) {} - - static constexpr const char* kDexCacheArrayFixupsMipsPassName = "dex_cache_array_fixups_mips"; - - void Run() OVERRIDE; - - private: - CodeGenerator* codegen_; -}; - -} // namespace mips -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_ diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index 88473f02e5..84b20f65e3 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -695,8 +695,8 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveOp(HLoopInform /*fetch*/ nullptr, type_); default: - CHECK(false) << op; - break; + LOG(FATAL) << op; + UNREACHABLE(); } } } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 9be6a512f5..142c95780e 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -56,7 +56,7 @@ static constexpr size_t kMaximumNumberOfInstructionsForSmallMethod = 3; // Limit the number of dex registers that we accumulate while inlining // to avoid creating large amount of nested environments. -static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 64; +static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 32; // Limit recursive call inlining, which do not benefit from too // much inlining compared to code locality. diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index df9e7164ed..a73b1246d8 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -888,7 +888,7 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, } HInvokeStaticOrDirect::DispatchInfo dispatch_info = { - HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod, + HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall, HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, 0u }; diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 990a773a95..37d79814be 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -124,12 +124,12 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { // are no pools emitted. vixl::EmissionCheckScope guard(codegen->GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), - LocationFrom(kArtMethodRegister)); + codegen->GenerateStaticOrDirectCall( + invoke_->AsInvokeStaticOrDirect(), LocationFrom(kArtMethodRegister), this); } else { - codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), LocationFrom(kArtMethodRegister)); + codegen->GenerateVirtualCall( + invoke_->AsInvokeVirtual(), LocationFrom(kArtMethodRegister), this); } - codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); } // Copy the result back to the expected output. diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 0e04b9a950..3c9b613803 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -97,11 +97,10 @@ class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL { Location method_loc = MoveArguments(codegen); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc); + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc, this); } else { - codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc); + codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc, this); } - codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); // Copy the result back to the expected output. Location out = invoke_->GetLocations()->Out(); diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index ea3e9e5ec9..4cea6dfdfb 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -112,12 +112,12 @@ class IntrinsicSlowPathMIPS : public SlowPathCodeMIPS { MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), - Location::RegisterLocation(A0)); + codegen->GenerateStaticOrDirectCall( + invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(A0), this); } else { - codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0)); + codegen->GenerateVirtualCall( + invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0), this); } - codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); // Copy the result back to the expected output. Location out = invoke_->GetLocations()->Out(); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 2ecb1a3b02..d785567e0f 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -101,12 +101,12 @@ class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 { MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), - Location::RegisterLocation(A0)); + codegen->GenerateStaticOrDirectCall( + invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(A0), this); } else { - codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0)); + codegen->GenerateVirtualCall( + invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0), this); } - codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); // Copy the result back to the expected output. Location out = invoke_->GetLocations()->Out(); diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h index c1f9ae6425..8c69d9b643 100644 --- a/compiler/optimizing/intrinsics_utils.h +++ b/compiler/optimizing/intrinsics_utils.h @@ -56,11 +56,10 @@ class IntrinsicSlowPath : public SlowPathCode { Location method_loc = MoveArguments(codegen); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc); + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc, this); } else { - codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc); + codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc, this); } - codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); // Copy the result back to the expected output. Location out = invoke_->GetLocations()->Out(); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index a9da15d2ce..6b4851d541 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -796,7 +796,6 @@ static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) DCHECK(invoke->IsInvokeStaticOrDirect()); codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(EAX)); - codegen->RecordPcInfo(invoke, invoke->GetDexPc()); // Copy the result back to the expected output. Location out = invoke->GetLocations()->Out(); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 8100645e54..ef98b7be30 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -567,7 +567,6 @@ static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invo DCHECK(invoke->IsInvokeStaticOrDirect()); codegen->GenerateStaticOrDirectCall( invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI)); - codegen->RecordPcInfo(invoke, invoke->GetDexPc()); // Copy the result back to the expected output. Location out = invoke->GetLocations()->Out(); diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index cfcafa5c24..9c8a632d40 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -499,6 +499,7 @@ void HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { body = it.Current(); } } + CHECK(body != nullptr); // Ensure there is only a single exit point. if (header->GetSuccessors().size() != 2) { return; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 9a91287670..d0047c54f2 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2623,7 +2623,7 @@ const DexFile& HInvokeStaticOrDirect::GetDexFileForPcRelativeDexCache() const { } bool HInvokeStaticOrDirect::NeedsDexCacheOfDeclaringClass() const { - if (GetMethodLoadKind() != MethodLoadKind::kDexCacheViaMethod) { + if (GetMethodLoadKind() != MethodLoadKind::kRuntimeCall) { return false; } if (!IsIntrinsic()) { @@ -2643,10 +2643,10 @@ std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind return os << "BootImageLinkTimePcRelative"; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: return os << "DirectAddress"; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - return os << "DexCachePcRelative"; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: - return os << "DexCacheViaMethod"; + case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: + return os << "BssEntry"; + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: + return os << "RuntimeCall"; default: LOG(FATAL) << "Unknown MethodLoadKind: " << static_cast<int>(rhs); UNREACHABLE(); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index befd0ff97b..2867797e20 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1410,12 +1410,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(IntermediateAddressIndex, Instruction) #endif -#ifndef ART_ENABLE_CODEGEN_arm #define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) -#else -#define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) \ - M(ArmDexCacheArraysBase, Instruction) -#endif #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) @@ -1424,7 +1419,6 @@ class HLoopInformationOutwardIterator : public ValueObject { #else #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) \ M(MipsComputeBaseMethodAddress, Instruction) \ - M(MipsDexCacheArraysBase, Instruction) \ M(MipsPackedSwitch, Instruction) #endif @@ -4166,17 +4160,13 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { // Used for app->boot calls with non-relocatable image and for JIT-compiled calls. kDirectAddress, - // Load from resolved methods array in the dex cache using a PC-relative load. - // Used when we need to use the dex cache, for example for invoke-static that - // may cause class initialization (the entry may point to a resolution method), - // and we know that we can access the dex cache arrays using a PC-relative load. - kDexCachePcRelative, - - // Use ArtMethod* from the resolved methods of the compiled method's own ArtMethod*. - // Used for JIT when we need to use the dex cache. This is also the last-resort-kind - // used when other kinds are unavailable (say, dex cache arrays are not PC-relative) - // or unimplemented or impractical (i.e. slow) on a particular architecture. - kDexCacheViaMethod, + // Load from an entry in the .bss section using a PC-relative load. + // Used for classes outside boot image when .bss is accessible with a PC-relative load. + kBssEntry, + + // Make a runtime call to resolve and call the method. This is the last-resort-kind + // used when other kinds are unimplemented on a particular architecture. + kRuntimeCall, }; // Determines the location of the code pointer. @@ -4197,7 +4187,6 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { // - thread entrypoint offset for kStringInit method if this is a string init invoke. // Note that there are multiple string init methods, each having its own offset. // - the method address for kDirectAddress - // - the dex cache arrays offset for kDexCachePcRel. uint64_t method_load_data; }; @@ -4298,12 +4287,9 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { bool NeedsDexCacheOfDeclaringClass() const OVERRIDE; bool IsStringInit() const { return GetMethodLoadKind() == MethodLoadKind::kStringInit; } bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; } - bool HasPcRelativeDexCache() const { - return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative; - } bool HasPcRelativeMethodLoadKind() const { return GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative || - GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative; + GetMethodLoadKind() == MethodLoadKind::kBssEntry; } bool HasCurrentMethodInput() const { // This function can be called only after the invoke has been fully initialized by the builder. @@ -4327,11 +4313,6 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { return dispatch_info_.method_load_data; } - uint32_t GetDexCacheArrayOffset() const { - DCHECK(HasPcRelativeDexCache()); - return dispatch_info_.method_load_data; - } - const DexFile& GetDexFileForPcRelativeDexCache() const; ClinitCheckRequirement GetClinitCheckRequirement() const { @@ -4376,7 +4357,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { // Does this method load kind need the current method as an input? static bool NeedsCurrentMethodInput(MethodLoadKind kind) { - return kind == MethodLoadKind::kRecursive || kind == MethodLoadKind::kDexCacheViaMethod; + return kind == MethodLoadKind::kRecursive || kind == MethodLoadKind::kRuntimeCall; } DECLARE_INSTRUCTION(InvokeStaticOrDirect); @@ -6881,9 +6862,6 @@ class HParallelMove FINAL : public HTemplateInstruction<0> { #if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64) #include "nodes_shared.h" #endif -#ifdef ART_ENABLE_CODEGEN_arm -#include "nodes_arm.h" -#endif #ifdef ART_ENABLE_CODEGEN_mips #include "nodes_mips.h" #endif diff --git a/compiler/optimizing/nodes_arm.h b/compiler/optimizing/nodes_arm.h deleted file mode 100644 index d9f9740e73..0000000000 --- a/compiler/optimizing/nodes_arm.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_NODES_ARM_H_ -#define ART_COMPILER_OPTIMIZING_NODES_ARM_H_ - -namespace art { - -class HArmDexCacheArraysBase FINAL : public HExpression<0> { - public: - explicit HArmDexCacheArraysBase(const DexFile& dex_file) - : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc), - dex_file_(&dex_file), - element_offset_(static_cast<size_t>(-1)) { } - - bool CanBeMoved() const OVERRIDE { return true; } - - void UpdateElementOffset(size_t element_offset) { - // Use the lowest offset from the requested elements so that all offsets from - // this base are non-negative because our assemblers emit negative-offset loads - // as a sequence of two or more instructions. (However, positive offsets beyond - // 4KiB also require two or more instructions, so this simple heuristic could - // be improved for cases where there is a dense cluster of elements far from - // the lowest offset. This is expected to be rare enough though, so we choose - // not to spend compile time on elaborate calculations.) - element_offset_ = std::min(element_offset_, element_offset); - } - - const DexFile& GetDexFile() const { - return *dex_file_; - } - - size_t GetElementOffset() const { - return element_offset_; - } - - DECLARE_INSTRUCTION(ArmDexCacheArraysBase); - - private: - const DexFile* dex_file_; - size_t element_offset_; - - DISALLOW_COPY_AND_ASSIGN(HArmDexCacheArraysBase); -}; - -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_NODES_ARM_H_ diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h index 36431c1fb9..8e439d9621 100644 --- a/compiler/optimizing/nodes_mips.h +++ b/compiler/optimizing/nodes_mips.h @@ -34,38 +34,6 @@ class HMipsComputeBaseMethodAddress : public HExpression<0> { DISALLOW_COPY_AND_ASSIGN(HMipsComputeBaseMethodAddress); }; -class HMipsDexCacheArraysBase : public HExpression<0> { - public: - explicit HMipsDexCacheArraysBase(const DexFile& dex_file) - : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc), - dex_file_(&dex_file), - element_offset_(static_cast<size_t>(-1)) { } - - bool CanBeMoved() const OVERRIDE { return true; } - - void UpdateElementOffset(size_t element_offset) { - // We'll maximize the range of a single load instruction for dex cache array accesses - // by aligning offset -32768 with the offset of the first used element. - element_offset_ = std::min(element_offset_, element_offset); - } - - const DexFile& GetDexFile() const { - return *dex_file_; - } - - size_t GetElementOffset() const { - return element_offset_; - } - - DECLARE_INSTRUCTION(MipsDexCacheArraysBase); - - private: - const DexFile* dex_file_; - size_t element_offset_; - - DISALLOW_COPY_AND_ASSIGN(HMipsDexCacheArraysBase); -}; - // Mips version of HPackedSwitch that holds a pointer to the base method address. class HMipsPackedSwitch FINAL : public HTemplateInstruction<2> { public: diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index e5ab00bce3..890ba674b5 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -24,16 +24,11 @@ #include "android-base/strings.h" -#ifdef ART_ENABLE_CODEGEN_arm -#include "dex_cache_array_fixups_arm.h" -#endif - #ifdef ART_ENABLE_CODEGEN_arm64 #include "instruction_simplifier_arm64.h" #endif #ifdef ART_ENABLE_CODEGEN_mips -#include "dex_cache_array_fixups_mips.h" #include "pc_relative_fixups_mips.h" #endif @@ -522,8 +517,6 @@ static HOptimization* BuildOptimization( } else if (opt_name == CodeSinking::kCodeSinkingPassName) { return new (arena) CodeSinking(graph, stats); #ifdef ART_ENABLE_CODEGEN_arm - } else if (opt_name == arm::DexCacheArrayFixups::kDexCacheArrayFixupsArmPassName) { - return new (arena) arm::DexCacheArrayFixups(graph, codegen, stats); } else if (opt_name == arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName) { return new (arena) arm::InstructionSimplifierArm(graph, stats); #endif @@ -532,8 +525,6 @@ static HOptimization* BuildOptimization( return new (arena) arm64::InstructionSimplifierArm64(graph, stats); #endif #ifdef ART_ENABLE_CODEGEN_mips - } else if (opt_name == mips::DexCacheArrayFixups::kDexCacheArrayFixupsMipsPassName) { - return new (arena) mips::DexCacheArrayFixups(graph, codegen, stats); } else if (opt_name == mips::PcRelativeFixups::kPcRelativeFixupsMipsPassName) { return new (arena) mips::PcRelativeFixups(graph, codegen, stats); #endif @@ -641,8 +632,6 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, #if defined(ART_ENABLE_CODEGEN_arm) case kThumb2: case kArm: { - arm::DexCacheArrayFixups* fixups = - new (arena) arm::DexCacheArrayFixups(graph, codegen, stats); arm::InstructionSimplifierArm* simplifier = new (arena) arm::InstructionSimplifierArm(graph, stats); SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); @@ -653,7 +642,6 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, simplifier, side_effects, gvn, - fixups, scheduling, }; RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer); @@ -682,11 +670,8 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, case kMips: { mips::PcRelativeFixups* pc_relative_fixups = new (arena) mips::PcRelativeFixups(graph, codegen, stats); - mips::DexCacheArrayFixups* dex_cache_array_fixups = - new (arena) mips::DexCacheArrayFixups(graph, codegen, stats); HOptimization* mips_optimizations[] = { pc_relative_fixups, - dex_cache_array_fixups }; RunOptimizations(mips_optimizations, arraysize(mips_optimizations), pass_observer); break; diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc index bce54bf49a..21b645279e 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.cc +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -59,10 +59,9 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { - // If this is an invoke with PC-relative pointer to a method, + // If this is an invoke with PC-relative load kind, // we need to add the base as the special input. - if (invoke->GetMethodLoadKind() == - HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative && + if (invoke->HasPcRelativeMethodLoadKind() && !IsCallFreeIntrinsic<IntrinsicLocationsBuilderMIPS>(invoke, codegen_)) { InitializePCRelativeBasePointer(); // Add the special argument base to the method. diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc index 832a7e1571..e78cd78aa2 100644 --- a/compiler/optimizing/scheduler_arm.cc +++ b/compiler/optimizing/scheduler_arm.cc @@ -818,10 +818,5 @@ void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) { } } -void SchedulingLatencyVisitorARM::VisitArmDexCacheArraysBase(art::HArmDexCacheArraysBase*) { - last_visited_internal_latency_ = kArmIntegerOpLatency; - last_visited_latency_ = kArmIntegerOpLatency; -} - } // namespace arm } // namespace art diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 106b709eda..8bd568befd 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -128,15 +128,8 @@ void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative; code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; } else { - // Use PC-relative access to the dex cache arrays. - method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative; - // Note: we use the invoke's graph instead of the codegen graph, which are - // different when inlining (the codegen graph is the most outer graph). The - // invoke's dex method index is relative to the dex file where the invoke's graph - // was built from. - DexCacheArraysLayout layout(GetInstructionSetPointerSize(codegen->GetInstructionSet()), - &invoke->GetBlock()->GetGraph()->GetDexFile()); - method_load_data = layout.MethodOffset(invoke->GetDexMethodIndex()); + // Use PC-relative access to the .bss methods arrays. + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBssEntry; code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; } diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index 0f24e81be2..bb23a29064 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -25,7 +25,7 @@ #include "base/bit_utils.h" #include "base/enums.h" #include "base/logging.h" -#include "base/stl_util.h" +#include "base/stl_util_identity.h" #include "base/value_object.h" #include "constants_arm.h" #include "utils/arm/assembler_arm_shared.h" diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc index 6afc3ddecb..eb3f870432 100644 --- a/compiler/utils/arm/assembler_arm_vixl.cc +++ b/compiler/utils/arm/assembler_arm_vixl.cc @@ -18,6 +18,8 @@ #include <type_traits> #include "assembler_arm_vixl.h" +#include "base/bit_utils.h" +#include "base/bit_utils_iterator.h" #include "entrypoints/quick/quick_entrypoints.h" #include "thread.h" diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index a99d02d4d0..0b05b752da 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -404,6 +404,129 @@ uint32_t MipsAssembler::EmitFI(int opcode, int fmt, FRegister ft, uint16_t imm) return encoding; } +uint32_t MipsAssembler::EmitMsa3R(int operation, + int df, + VectorRegister wt, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(wt, kNoVectorRegister); + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsaOperationShift | + df << kDfShift | + static_cast<uint32_t>(wt) << kWtShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); + return encoding; +} + +uint32_t MipsAssembler::EmitMsaBIT(int operation, + int df_m, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsaOperationShift | + df_m << kDfMShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); + return encoding; +} + +uint32_t MipsAssembler::EmitMsaELM(int operation, + int df_n, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsaELMOperationShift | + df_n << kDfNShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); + return encoding; +} + +uint32_t MipsAssembler::EmitMsaMI10(int s10, + Register rs, + VectorRegister wd, + int minor_opcode, + int df) { + CHECK_NE(rs, kNoRegister); + CHECK_NE(wd, kNoVectorRegister); + CHECK(IsUint<10>(s10)) << s10; + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + s10 << kS10Shift | + static_cast<uint32_t>(rs) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode << kS10MinorShift | + df; + Emit(encoding); + return encoding; +} + +uint32_t MipsAssembler::EmitMsaI10(int operation, + int df, + int i10, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(wd, kNoVectorRegister); + CHECK(IsUint<10>(i10)) << i10; + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsaOperationShift | + df << kDfShift | + i10 << kI10Shift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); + return encoding; +} + +uint32_t MipsAssembler::EmitMsa2R(int operation, + int df, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsa2ROperationShift | + df << kDf2RShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); + return encoding; +} + +uint32_t MipsAssembler::EmitMsa2RF(int operation, + int df, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsa2RFOperationShift | + df << kDf2RShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); + return encoding; +} + void MipsAssembler::Addu(Register rd, Register rs, Register rt) { DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x21), rd, rs, rt); } @@ -635,9 +758,8 @@ void MipsAssembler::Ins(Register rd, Register rt, int pos, int size) { DsFsmInstrRrr(EmitR(0x1f, rt, rd, static_cast<Register>(pos + size - 1), pos, 0x04), rd, rd, rt); } -// TODO: This instruction is available in both R6 and MSA and it should be used when available. void MipsAssembler::Lsa(Register rd, Register rs, Register rt, int saPlusOne) { - CHECK(IsR6()); + CHECK(IsR6() || HasMsa()); CHECK(1 <= saPlusOne && saPlusOne <= 4) << saPlusOne; int sa = saPlusOne - 1; DsFsmInstrRrr(EmitR(0x0, rs, rt, rd, sa, 0x05), rd, rs, rt); @@ -653,7 +775,7 @@ void MipsAssembler::ShiftAndAdd(Register dst, if (shamt == TIMES_1) { // Catch the special case where the shift amount is zero (0). Addu(dst, src_base, src_idx); - } else if (IsR6()) { + } else if (IsR6() || HasMsa()) { Lsa(dst, src_idx, src_base, shamt); } else { Sll(tmp, src_idx, shamt); @@ -1709,6 +1831,1079 @@ void MipsAssembler::PopAndReturn(Register rd, Register rt) { SetReorder(reordering); } +void MipsAssembler::AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1e), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1e), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1e), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1e), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x12), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x12), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x12), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x12), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x12), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x12), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x12), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x12), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x12), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x12), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x12), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x12), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x12), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x12), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x12), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x12), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x12), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x12), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x12), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x12), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Add_aB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x10), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Add_aH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x10), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Add_aW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x10), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Add_aD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x10), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Ave_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x10), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Ave_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x10), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Ave_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x10), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Ave_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x10), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Ave_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x10), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Ave_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x10), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Ave_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x10), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Ave_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x10), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Aver_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x10), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Aver_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x10), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Aver_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x10), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Aver_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x10), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Aver_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x10), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x10), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x10), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x10), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Max_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x2, 0x0, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Max_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x2, 0x1, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Max_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x2, 0x2, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Max_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x2, 0x3, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Max_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x3, 0x0, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Max_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x3, 0x1, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Max_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x3, 0x2, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Max_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x3, 0x3, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Min_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x4, 0x0, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Min_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Min_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Min_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Min_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Min_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Min_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Min_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0xe), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1b), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1b), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1b), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1b), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x1, 0x0, wt, ws, wd, 0x1b), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x1, 0x1, wt, ws, wd, 0x1b), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x1, 0x2, wt, ws, wd, 0x1b), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x1, 0x3, wt, ws, wd, 0x1b), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::FmaxW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x1b), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::FmaxD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x1b), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::FminW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x1b), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::FminD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x1b), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::Ffint_sW(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa2RF(0x19e, 0x0, ws, wd, 0x1e), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(ws)); +} + +void MipsAssembler::Ffint_sD(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa2RF(0x19e, 0x1, ws, wd, 0x1e), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(ws)); +} + +void MipsAssembler::Ftint_sW(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa2RF(0x19c, 0x0, ws, wd, 0x1e), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(ws)); +} + +void MipsAssembler::Ftint_sD(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa2RF(0x19c, 0x1, ws, wd, 0x1e), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(ws)); +} + +void MipsAssembler::SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xd), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xd), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xd), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xd), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xd), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xd), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xd), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xd), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x2, 0x0, wt, ws, wd, 0xd), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x2, 0x1, wt, ws, wd, 0xd), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x2, 0x2, wt, ws, wd, 0xd), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x2, 0x3, wt, ws, wd, 0xd), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::SlliB(VectorRegister wd, VectorRegister ws, int shamt3) { + CHECK(HasMsa()); + CHECK(IsUint<3>(shamt3)) << shamt3; + DsFsmInstrFff(EmitMsaBIT(0x0, shamt3 | kMsaDfMByteMask, ws, wd, 0x9), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(ws)); +} + +void MipsAssembler::SlliH(VectorRegister wd, VectorRegister ws, int shamt4) { + CHECK(HasMsa()); + CHECK(IsUint<4>(shamt4)) << shamt4; + DsFsmInstrFff(EmitMsaBIT(0x0, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(ws)); +} + +void MipsAssembler::SlliW(VectorRegister wd, VectorRegister ws, int shamt5) { + CHECK(HasMsa()); + CHECK(IsUint<5>(shamt5)) << shamt5; + DsFsmInstrFff(EmitMsaBIT(0x0, shamt5 | kMsaDfMWordMask, ws, wd, 0x9), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(ws)); +} + +void MipsAssembler::SlliD(VectorRegister wd, VectorRegister ws, int shamt6) { + CHECK(HasMsa()); + CHECK(IsUint<6>(shamt6)) << shamt6; + DsFsmInstrFff(EmitMsaBIT(0x0, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(ws)); +} + +void MipsAssembler::SraiB(VectorRegister wd, VectorRegister ws, int shamt3) { + CHECK(HasMsa()); + CHECK(IsUint<3>(shamt3)) << shamt3; + DsFsmInstrFff(EmitMsaBIT(0x1, shamt3 | kMsaDfMByteMask, ws, wd, 0x9), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(ws)); +} + +void MipsAssembler::SraiH(VectorRegister wd, VectorRegister ws, int shamt4) { + CHECK(HasMsa()); + CHECK(IsUint<4>(shamt4)) << shamt4; + DsFsmInstrFff(EmitMsaBIT(0x1, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(ws)); +} + +void MipsAssembler::SraiW(VectorRegister wd, VectorRegister ws, int shamt5) { + CHECK(HasMsa()); + CHECK(IsUint<5>(shamt5)) << shamt5; + DsFsmInstrFff(EmitMsaBIT(0x1, shamt5 | kMsaDfMWordMask, ws, wd, 0x9), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(ws)); +} + +void MipsAssembler::SraiD(VectorRegister wd, VectorRegister ws, int shamt6) { + CHECK(HasMsa()); + CHECK(IsUint<6>(shamt6)) << shamt6; + DsFsmInstrFff(EmitMsaBIT(0x1, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(ws)); +} + +void MipsAssembler::SrliB(VectorRegister wd, VectorRegister ws, int shamt3) { + CHECK(HasMsa()); + CHECK(IsUint<3>(shamt3)) << shamt3; + DsFsmInstrFff(EmitMsaBIT(0x2, shamt3 | kMsaDfMByteMask, ws, wd, 0x9), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(ws)); +} + +void MipsAssembler::SrliH(VectorRegister wd, VectorRegister ws, int shamt4) { + CHECK(HasMsa()); + CHECK(IsUint<4>(shamt4)) << shamt4; + DsFsmInstrFff(EmitMsaBIT(0x2, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(ws)); +} + +void MipsAssembler::SrliW(VectorRegister wd, VectorRegister ws, int shamt5) { + CHECK(HasMsa()); + CHECK(IsUint<5>(shamt5)) << shamt5; + DsFsmInstrFff(EmitMsaBIT(0x2, shamt5 | kMsaDfMWordMask, ws, wd, 0x9), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(ws)); +} + +void MipsAssembler::SrliD(VectorRegister wd, VectorRegister ws, int shamt6) { + CHECK(HasMsa()); + CHECK(IsUint<6>(shamt6)) << shamt6; + DsFsmInstrFff(EmitMsaBIT(0x2, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(ws)); +} + +void MipsAssembler::MoveV(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsaBIT(0x1, 0x3e, ws, wd, 0x19), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(ws)); +} + +void MipsAssembler::SplatiB(VectorRegister wd, VectorRegister ws, int n4) { + CHECK(HasMsa()); + CHECK(IsUint<4>(n4)) << n4; + DsFsmInstrFff(EmitMsaELM(0x1, n4 | kMsaDfNByteMask, ws, wd, 0x19), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(ws)); +} + +void MipsAssembler::SplatiH(VectorRegister wd, VectorRegister ws, int n3) { + CHECK(HasMsa()); + CHECK(IsUint<3>(n3)) << n3; + DsFsmInstrFff(EmitMsaELM(0x1, n3 | kMsaDfNHalfwordMask, ws, wd, 0x19), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(ws)); +} + +void MipsAssembler::SplatiW(VectorRegister wd, VectorRegister ws, int n2) { + CHECK(HasMsa()); + CHECK(IsUint<2>(n2)) << n2; + DsFsmInstrFff(EmitMsaELM(0x1, n2 | kMsaDfNWordMask, ws, wd, 0x19), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(ws)); +} + +void MipsAssembler::SplatiD(VectorRegister wd, VectorRegister ws, int n1) { + CHECK(HasMsa()); + CHECK(IsUint<1>(n1)) << n1; + DsFsmInstrFff(EmitMsaELM(0x1, n1 | kMsaDfNDoublewordMask, ws, wd, 0x19), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(ws)); +} + +void MipsAssembler::FillB(VectorRegister wd, Register rs) { + CHECK(HasMsa()); + DsFsmInstrFr(EmitMsa2R(0xc0, 0x0, static_cast<VectorRegister>(rs), wd, 0x1e), + static_cast<FRegister>(wd), + rs); +} + +void MipsAssembler::FillH(VectorRegister wd, Register rs) { + CHECK(HasMsa()); + DsFsmInstrFr(EmitMsa2R(0xc0, 0x1, static_cast<VectorRegister>(rs), wd, 0x1e), + static_cast<FRegister>(wd), + rs); +} + +void MipsAssembler::FillW(VectorRegister wd, Register rs) { + CHECK(HasMsa()); + DsFsmInstrFr(EmitMsa2R(0xc0, 0x2, static_cast<VectorRegister>(rs), wd, 0x1e), + static_cast<FRegister>(wd), + rs); +} + +void MipsAssembler::LdiB(VectorRegister wd, int imm8) { + CHECK(HasMsa()); + CHECK(IsInt<8>(imm8)) << imm8; + DsFsmInstrFr(EmitMsaI10(0x6, 0x0, imm8 & kMsaS10Mask, wd, 0x7), + static_cast<FRegister>(wd), + ZERO); +} + +void MipsAssembler::LdiH(VectorRegister wd, int imm10) { + CHECK(HasMsa()); + CHECK(IsInt<10>(imm10)) << imm10; + DsFsmInstrFr(EmitMsaI10(0x6, 0x1, imm10 & kMsaS10Mask, wd, 0x7), + static_cast<FRegister>(wd), + ZERO); +} + +void MipsAssembler::LdiW(VectorRegister wd, int imm10) { + CHECK(HasMsa()); + CHECK(IsInt<10>(imm10)) << imm10; + DsFsmInstrFr(EmitMsaI10(0x6, 0x2, imm10 & kMsaS10Mask, wd, 0x7), + static_cast<FRegister>(wd), + ZERO); +} + +void MipsAssembler::LdiD(VectorRegister wd, int imm10) { + CHECK(HasMsa()); + CHECK(IsInt<10>(imm10)) << imm10; + DsFsmInstrFr(EmitMsaI10(0x6, 0x3, imm10 & kMsaS10Mask, wd, 0x7), + static_cast<FRegister>(wd), + ZERO); +} + +void MipsAssembler::LdB(VectorRegister wd, Register rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<10>(offset)) << offset; + DsFsmInstrFr(EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x8, 0x0), + static_cast<FRegister>(wd), + rs); +} + +void MipsAssembler::LdH(VectorRegister wd, Register rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<11>(offset)) << offset; + CHECK_ALIGNED(offset, kMipsHalfwordSize); + DsFsmInstrFr(EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x8, 0x1), + static_cast<FRegister>(wd), + rs); +} + +void MipsAssembler::LdW(VectorRegister wd, Register rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<12>(offset)) << offset; + CHECK_ALIGNED(offset, kMipsWordSize); + DsFsmInstrFr(EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x8, 0x2), + static_cast<FRegister>(wd), + rs); +} + +void MipsAssembler::LdD(VectorRegister wd, Register rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<13>(offset)) << offset; + CHECK_ALIGNED(offset, kMipsDoublewordSize); + DsFsmInstrFr(EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x8, 0x3), + static_cast<FRegister>(wd), + rs); +} + +void MipsAssembler::StB(VectorRegister wd, Register rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<10>(offset)) << offset; + DsFsmInstrFR(EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x9, 0x0), static_cast<FRegister>(wd), rs); +} + +void MipsAssembler::StH(VectorRegister wd, Register rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<11>(offset)) << offset; + CHECK_ALIGNED(offset, kMipsHalfwordSize); + DsFsmInstrFR(EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x9, 0x1), + static_cast<FRegister>(wd), + rs); +} + +void MipsAssembler::StW(VectorRegister wd, Register rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<12>(offset)) << offset; + CHECK_ALIGNED(offset, kMipsWordSize); + DsFsmInstrFR(EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x9, 0x2), + static_cast<FRegister>(wd), + rs); +} + +void MipsAssembler::StD(VectorRegister wd, Register rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<13>(offset)) << offset; + CHECK_ALIGNED(offset, kMipsDoublewordSize); + DsFsmInstrFR(EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x9, 0x3), + static_cast<FRegister>(wd), + rs); +} + +void MipsAssembler::IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x14), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::IlvrH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x14), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x14), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + +void MipsAssembler::IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + DsFsmInstrFff(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x14), + static_cast<FRegister>(wd), + static_cast<FRegister>(ws), + static_cast<FRegister>(wt)); +} + void MipsAssembler::LoadConst32(Register rd, int32_t value) { if (IsUint<16>(value)) { // Use OR with (unsigned) immediate to encode 16b unsigned int. diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index 463daeb5d7..dd4ce6dc80 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -25,6 +25,7 @@ #include "base/arena_containers.h" #include "base/enums.h" #include "base/macros.h" +#include "base/stl_util_identity.h" #include "constants_mips.h" #include "globals.h" #include "managed_register_mips.h" @@ -36,6 +37,7 @@ namespace art { namespace mips { +static constexpr size_t kMipsHalfwordSize = 2; static constexpr size_t kMipsWordSize = 4; static constexpr size_t kMipsDoublewordSize = 8; @@ -194,6 +196,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi last_position_adjustment_(0), last_old_position_(0), last_branch_id_(0), + has_msa_(instruction_set_features != nullptr ? instruction_set_features->HasMsa() : false), isa_features_(instruction_set_features) { cfi().DelayEmittingAdvancePCs(); } @@ -464,6 +467,149 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi void Clear(Register rd); void Not(Register rd, Register rs); + // MSA instructions. + void AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt); + + void AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Add_aB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Add_aH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Add_aW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Add_aD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Max_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Max_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Max_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Max_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Max_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Max_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Max_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Max_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Min_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Min_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Min_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Min_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Min_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Min_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Min_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Min_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + + void FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FmaxW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FmaxD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FminW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FminD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + + void Ffint_sW(VectorRegister wd, VectorRegister ws); + void Ffint_sD(VectorRegister wd, VectorRegister ws); + void Ftint_sW(VectorRegister wd, VectorRegister ws); + void Ftint_sD(VectorRegister wd, VectorRegister ws); + + void SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + + // Immediate shift instructions, where shamtN denotes shift amount (must be between 0 and 2^N-1). + void SlliB(VectorRegister wd, VectorRegister ws, int shamt3); + void SlliH(VectorRegister wd, VectorRegister ws, int shamt4); + void SlliW(VectorRegister wd, VectorRegister ws, int shamt5); + void SlliD(VectorRegister wd, VectorRegister ws, int shamt6); + void SraiB(VectorRegister wd, VectorRegister ws, int shamt3); + void SraiH(VectorRegister wd, VectorRegister ws, int shamt4); + void SraiW(VectorRegister wd, VectorRegister ws, int shamt5); + void SraiD(VectorRegister wd, VectorRegister ws, int shamt6); + void SrliB(VectorRegister wd, VectorRegister ws, int shamt3); + void SrliH(VectorRegister wd, VectorRegister ws, int shamt4); + void SrliW(VectorRegister wd, VectorRegister ws, int shamt5); + void SrliD(VectorRegister wd, VectorRegister ws, int shamt6); + + void MoveV(VectorRegister wd, VectorRegister ws); + void SplatiB(VectorRegister wd, VectorRegister ws, int n4); + void SplatiH(VectorRegister wd, VectorRegister ws, int n3); + void SplatiW(VectorRegister wd, VectorRegister ws, int n2); + void SplatiD(VectorRegister wd, VectorRegister ws, int n1); + void FillB(VectorRegister wd, Register rs); + void FillH(VectorRegister wd, Register rs); + void FillW(VectorRegister wd, Register rs); + + void LdiB(VectorRegister wd, int imm8); + void LdiH(VectorRegister wd, int imm10); + void LdiW(VectorRegister wd, int imm10); + void LdiD(VectorRegister wd, int imm10); + void LdB(VectorRegister wd, Register rs, int offset); + void LdH(VectorRegister wd, Register rs, int offset); + void LdW(VectorRegister wd, Register rs, int offset); + void LdD(VectorRegister wd, Register rs, int offset); + void StB(VectorRegister wd, Register rs, int offset); + void StH(VectorRegister wd, Register rs, int offset); + void StW(VectorRegister wd, Register rs, int offset); + void StD(VectorRegister wd, Register rs, int offset); + + void IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void IlvrH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + // Higher level composite instructions. void LoadConst32(Register rd, int32_t value); void LoadConst64(Register reg_hi, Register reg_lo, int64_t value); @@ -1282,6 +1428,30 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi uint32_t EmitFI(int opcode, int fmt, FRegister rt, uint16_t imm); void EmitBcondR2(BranchCondition cond, Register rs, Register rt, uint16_t imm16); void EmitBcondR6(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21); + uint32_t EmitMsa3R(int operation, + int df, + VectorRegister wt, + VectorRegister ws, + VectorRegister wd, + int minor_opcode); + uint32_t EmitMsaBIT(int operation, + int df_m, + VectorRegister ws, + VectorRegister wd, + int minor_opcode); + uint32_t EmitMsaELM(int operation, + int df_n, + VectorRegister ws, + VectorRegister wd, + int minor_opcode); + uint32_t EmitMsaMI10(int s10, Register rs, VectorRegister wd, int minor_opcode, int df); + uint32_t EmitMsaI10(int operation, int df, int i10, VectorRegister wd, int minor_opcode); + uint32_t EmitMsa2R(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode); + uint32_t EmitMsa2RF(int operation, + int df, + VectorRegister ws, + VectorRegister wd, + int minor_opcode); void Buncond(MipsLabel* label); void Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs = ZERO); @@ -1332,6 +1502,10 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi // Emits exception block. void EmitExceptionPoll(MipsExceptionSlowPath* exception); + bool HasMsa() const { + return has_msa_; + } + bool IsR6() const { if (isa_features_ != nullptr) { return isa_features_->IsR6(); @@ -1386,6 +1560,8 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi uint32_t last_old_position_; uint32_t last_branch_id_; + const bool has_msa_; + const MipsInstructionSetFeatures* isa_features_; DISALLOW_COPY_AND_ASSIGN(MipsAssembler); diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc index 30667efa38..d4642607ad 100644 --- a/compiler/utils/mips/assembler_mips32r6_test.cc +++ b/compiler/utils/mips/assembler_mips32r6_test.cc @@ -34,9 +34,14 @@ struct MIPSCpuRegisterCompare { class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler, mips::Register, mips::FRegister, - uint32_t> { + uint32_t, + mips::VectorRegister> { public: - typedef AssemblerTest<mips::MipsAssembler, mips::Register, mips::FRegister, uint32_t> Base; + typedef AssemblerTest<mips::MipsAssembler, + mips::Register, + mips::FRegister, + uint32_t, + mips::VectorRegister> Base; AssemblerMIPS32r6Test() : instruction_set_features_(MipsInstructionSetFeatures::FromVariant("mips32r6", nullptr)) { @@ -61,7 +66,7 @@ class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler, // We use "-modd-spreg" so we can use odd-numbered single precision FPU registers. // We put the code at address 0x1000000 (instead of 0) to avoid overlapping with the // .MIPS.abiflags section (there doesn't seem to be a way to suppress its generation easily). - return " -march=mips32r6 -modd-spreg -Wa,--no-warn" + return " -march=mips32r6 -mmsa -modd-spreg -Wa,--no-warn" " -Wl,-Ttext=0x1000000 -Wl,-e0x1000000 -nostdlib"; } @@ -182,6 +187,39 @@ class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler, fp_registers_.push_back(new mips::FRegister(mips::F29)); fp_registers_.push_back(new mips::FRegister(mips::F30)); fp_registers_.push_back(new mips::FRegister(mips::F31)); + + vec_registers_.push_back(new mips::VectorRegister(mips::W0)); + vec_registers_.push_back(new mips::VectorRegister(mips::W1)); + vec_registers_.push_back(new mips::VectorRegister(mips::W2)); + vec_registers_.push_back(new mips::VectorRegister(mips::W3)); + vec_registers_.push_back(new mips::VectorRegister(mips::W4)); + vec_registers_.push_back(new mips::VectorRegister(mips::W5)); + vec_registers_.push_back(new mips::VectorRegister(mips::W6)); + vec_registers_.push_back(new mips::VectorRegister(mips::W7)); + vec_registers_.push_back(new mips::VectorRegister(mips::W8)); + vec_registers_.push_back(new mips::VectorRegister(mips::W9)); + vec_registers_.push_back(new mips::VectorRegister(mips::W10)); + vec_registers_.push_back(new mips::VectorRegister(mips::W11)); + vec_registers_.push_back(new mips::VectorRegister(mips::W12)); + vec_registers_.push_back(new mips::VectorRegister(mips::W13)); + vec_registers_.push_back(new mips::VectorRegister(mips::W14)); + vec_registers_.push_back(new mips::VectorRegister(mips::W15)); + vec_registers_.push_back(new mips::VectorRegister(mips::W16)); + vec_registers_.push_back(new mips::VectorRegister(mips::W17)); + vec_registers_.push_back(new mips::VectorRegister(mips::W18)); + vec_registers_.push_back(new mips::VectorRegister(mips::W19)); + vec_registers_.push_back(new mips::VectorRegister(mips::W20)); + vec_registers_.push_back(new mips::VectorRegister(mips::W21)); + vec_registers_.push_back(new mips::VectorRegister(mips::W22)); + vec_registers_.push_back(new mips::VectorRegister(mips::W23)); + vec_registers_.push_back(new mips::VectorRegister(mips::W24)); + vec_registers_.push_back(new mips::VectorRegister(mips::W25)); + vec_registers_.push_back(new mips::VectorRegister(mips::W26)); + vec_registers_.push_back(new mips::VectorRegister(mips::W27)); + vec_registers_.push_back(new mips::VectorRegister(mips::W28)); + vec_registers_.push_back(new mips::VectorRegister(mips::W29)); + vec_registers_.push_back(new mips::VectorRegister(mips::W30)); + vec_registers_.push_back(new mips::VectorRegister(mips::W31)); } } @@ -189,6 +227,7 @@ class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler, AssemblerTest::TearDown(); STLDeleteElements(®isters_); STLDeleteElements(&fp_registers_); + STLDeleteElements(&vec_registers_); } std::vector<mips::Register*> GetRegisters() OVERRIDE { @@ -199,6 +238,10 @@ class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler, return fp_registers_; } + std::vector<mips::VectorRegister*> GetVectorRegisters() OVERRIDE { + return vec_registers_; + } + uint32_t CreateImmediate(int64_t imm_value) OVERRIDE { return imm_value; } @@ -250,6 +293,7 @@ class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler, std::map<mips::Register, std::string, MIPSCpuRegisterCompare> secondary_register_names_; std::vector<mips::FRegister*> fp_registers_; + std::vector<mips::VectorRegister*> vec_registers_; std::unique_ptr<const MipsInstructionSetFeatures> instruction_set_features_; }; @@ -328,13 +372,11 @@ TEST_F(AssemblerMIPS32r6Test, Lsa) { } TEST_F(AssemblerMIPS32r6Test, Seleqz) { - DriverStr(RepeatRRR(&mips::MipsAssembler::Seleqz, "seleqz ${reg1}, ${reg2}, ${reg3}"), - "seleqz"); + DriverStr(RepeatRRR(&mips::MipsAssembler::Seleqz, "seleqz ${reg1}, ${reg2}, ${reg3}"), "seleqz"); } TEST_F(AssemblerMIPS32r6Test, Selnez) { - DriverStr(RepeatRRR(&mips::MipsAssembler::Selnez, "selnez ${reg1}, ${reg2}, ${reg3}"), - "selnez"); + DriverStr(RepeatRRR(&mips::MipsAssembler::Selnez, "selnez ${reg1}, ${reg2}, ${reg3}"), "selnez"); } TEST_F(AssemblerMIPS32r6Test, ClzR6) { @@ -914,6 +956,566 @@ TEST_F(AssemblerMIPS32r6Test, LongBranchReorder) { // AssemblerMIPS32r6Test.Bltu // AssemblerMIPS32r6Test.Bgeu +// MSA instructions. + +TEST_F(AssemblerMIPS32r6Test, AndV) { + DriverStr(RepeatVVV(&mips::MipsAssembler::AndV, "and.v ${reg1}, ${reg2}, ${reg3}"), "and.v"); +} + +TEST_F(AssemblerMIPS32r6Test, OrV) { + DriverStr(RepeatVVV(&mips::MipsAssembler::OrV, "or.v ${reg1}, ${reg2}, ${reg3}"), "or.v"); +} + +TEST_F(AssemblerMIPS32r6Test, NorV) { + DriverStr(RepeatVVV(&mips::MipsAssembler::NorV, "nor.v ${reg1}, ${reg2}, ${reg3}"), "nor.v"); +} + +TEST_F(AssemblerMIPS32r6Test, XorV) { + DriverStr(RepeatVVV(&mips::MipsAssembler::XorV, "xor.v ${reg1}, ${reg2}, ${reg3}"), "xor.v"); +} + +TEST_F(AssemblerMIPS32r6Test, AddvB) { + DriverStr(RepeatVVV(&mips::MipsAssembler::AddvB, "addv.b ${reg1}, ${reg2}, ${reg3}"), "addv.b"); +} + +TEST_F(AssemblerMIPS32r6Test, AddvH) { + DriverStr(RepeatVVV(&mips::MipsAssembler::AddvH, "addv.h ${reg1}, ${reg2}, ${reg3}"), "addv.h"); +} + +TEST_F(AssemblerMIPS32r6Test, AddvW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::AddvW, "addv.w ${reg1}, ${reg2}, ${reg3}"), "addv.w"); +} + +TEST_F(AssemblerMIPS32r6Test, AddvD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::AddvD, "addv.d ${reg1}, ${reg2}, ${reg3}"), "addv.d"); +} + +TEST_F(AssemblerMIPS32r6Test, SubvB) { + DriverStr(RepeatVVV(&mips::MipsAssembler::SubvB, "subv.b ${reg1}, ${reg2}, ${reg3}"), "subv.b"); +} + +TEST_F(AssemblerMIPS32r6Test, SubvH) { + DriverStr(RepeatVVV(&mips::MipsAssembler::SubvH, "subv.h ${reg1}, ${reg2}, ${reg3}"), "subv.h"); +} + +TEST_F(AssemblerMIPS32r6Test, SubvW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::SubvW, "subv.w ${reg1}, ${reg2}, ${reg3}"), "subv.w"); +} + +TEST_F(AssemblerMIPS32r6Test, SubvD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::SubvD, "subv.d ${reg1}, ${reg2}, ${reg3}"), "subv.d"); +} + +TEST_F(AssemblerMIPS32r6Test, MulvB) { + DriverStr(RepeatVVV(&mips::MipsAssembler::MulvB, "mulv.b ${reg1}, ${reg2}, ${reg3}"), "mulv.b"); +} + +TEST_F(AssemblerMIPS32r6Test, MulvH) { + DriverStr(RepeatVVV(&mips::MipsAssembler::MulvH, "mulv.h ${reg1}, ${reg2}, ${reg3}"), "mulv.h"); +} + +TEST_F(AssemblerMIPS32r6Test, MulvW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::MulvW, "mulv.w ${reg1}, ${reg2}, ${reg3}"), "mulv.w"); +} + +TEST_F(AssemblerMIPS32r6Test, MulvD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::MulvD, "mulv.d ${reg1}, ${reg2}, ${reg3}"), "mulv.d"); +} + +TEST_F(AssemblerMIPS32r6Test, Div_sB) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Div_sB, "div_s.b ${reg1}, ${reg2}, ${reg3}"), + "div_s.b"); +} + +TEST_F(AssemblerMIPS32r6Test, Div_sH) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Div_sH, "div_s.h ${reg1}, ${reg2}, ${reg3}"), + "div_s.h"); +} + +TEST_F(AssemblerMIPS32r6Test, Div_sW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Div_sW, "div_s.w ${reg1}, ${reg2}, ${reg3}"), + "div_s.w"); +} + +TEST_F(AssemblerMIPS32r6Test, Div_sD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Div_sD, "div_s.d ${reg1}, ${reg2}, ${reg3}"), + "div_s.d"); +} + +TEST_F(AssemblerMIPS32r6Test, Div_uB) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Div_uB, "div_u.b ${reg1}, ${reg2}, ${reg3}"), + "div_u.b"); +} + +TEST_F(AssemblerMIPS32r6Test, Div_uH) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Div_uH, "div_u.h ${reg1}, ${reg2}, ${reg3}"), + "div_u.h"); +} + +TEST_F(AssemblerMIPS32r6Test, Div_uW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Div_uW, "div_u.w ${reg1}, ${reg2}, ${reg3}"), + "div_u.w"); +} + +TEST_F(AssemblerMIPS32r6Test, Div_uD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Div_uD, "div_u.d ${reg1}, ${reg2}, ${reg3}"), + "div_u.d"); +} + +TEST_F(AssemblerMIPS32r6Test, Mod_sB) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_sB, "mod_s.b ${reg1}, ${reg2}, ${reg3}"), + "mod_s.b"); +} + +TEST_F(AssemblerMIPS32r6Test, Mod_sH) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_sH, "mod_s.h ${reg1}, ${reg2}, ${reg3}"), + "mod_s.h"); +} + +TEST_F(AssemblerMIPS32r6Test, Mod_sW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_sW, "mod_s.w ${reg1}, ${reg2}, ${reg3}"), + "mod_s.w"); +} + +TEST_F(AssemblerMIPS32r6Test, Mod_sD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_sD, "mod_s.d ${reg1}, ${reg2}, ${reg3}"), + "mod_s.d"); +} + +TEST_F(AssemblerMIPS32r6Test, Mod_uB) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_uB, "mod_u.b ${reg1}, ${reg2}, ${reg3}"), + "mod_u.b"); +} + +TEST_F(AssemblerMIPS32r6Test, Mod_uH) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_uH, "mod_u.h ${reg1}, ${reg2}, ${reg3}"), + "mod_u.h"); +} + +TEST_F(AssemblerMIPS32r6Test, Mod_uW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_uW, "mod_u.w ${reg1}, ${reg2}, ${reg3}"), + "mod_u.w"); +} + +TEST_F(AssemblerMIPS32r6Test, Mod_uD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_uD, "mod_u.d ${reg1}, ${reg2}, ${reg3}"), + "mod_u.d"); +} + +TEST_F(AssemblerMIPS32r6Test, Add_aB) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Add_aB, "add_a.b ${reg1}, ${reg2}, ${reg3}"), + "add_a.b"); +} + +TEST_F(AssemblerMIPS32r6Test, Add_aH) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Add_aH, "add_a.h ${reg1}, ${reg2}, ${reg3}"), + "add_a.h"); +} + +TEST_F(AssemblerMIPS32r6Test, Add_aW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Add_aW, "add_a.w ${reg1}, ${reg2}, ${reg3}"), + "add_a.w"); +} + +TEST_F(AssemblerMIPS32r6Test, Add_aD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Add_aD, "add_a.d ${reg1}, ${reg2}, ${reg3}"), + "add_a.d"); +} + +TEST_F(AssemblerMIPS32r6Test, Ave_sB) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_sB, "ave_s.b ${reg1}, ${reg2}, ${reg3}"), + "ave_s.b"); +} + +TEST_F(AssemblerMIPS32r6Test, Ave_sH) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_sH, "ave_s.h ${reg1}, ${reg2}, ${reg3}"), + "ave_s.h"); +} + +TEST_F(AssemblerMIPS32r6Test, Ave_sW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_sW, "ave_s.w ${reg1}, ${reg2}, ${reg3}"), + "ave_s.w"); +} + +TEST_F(AssemblerMIPS32r6Test, Ave_sD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_sD, "ave_s.d ${reg1}, ${reg2}, ${reg3}"), + "ave_s.d"); +} + +TEST_F(AssemblerMIPS32r6Test, Ave_uB) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_uB, "ave_u.b ${reg1}, ${reg2}, ${reg3}"), + "ave_u.b"); +} + +TEST_F(AssemblerMIPS32r6Test, Ave_uH) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_uH, "ave_u.h ${reg1}, ${reg2}, ${reg3}"), + "ave_u.h"); +} + +TEST_F(AssemblerMIPS32r6Test, Ave_uW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_uW, "ave_u.w ${reg1}, ${reg2}, ${reg3}"), + "ave_u.w"); +} + +TEST_F(AssemblerMIPS32r6Test, Ave_uD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_uD, "ave_u.d ${reg1}, ${reg2}, ${reg3}"), + "ave_u.d"); +} + +TEST_F(AssemblerMIPS32r6Test, Aver_sB) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_sB, "aver_s.b ${reg1}, ${reg2}, ${reg3}"), + "aver_s.b"); +} + +TEST_F(AssemblerMIPS32r6Test, Aver_sH) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_sH, "aver_s.h ${reg1}, ${reg2}, ${reg3}"), + "aver_s.h"); +} + +TEST_F(AssemblerMIPS32r6Test, Aver_sW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_sW, "aver_s.w ${reg1}, ${reg2}, ${reg3}"), + "aver_s.w"); +} + +TEST_F(AssemblerMIPS32r6Test, Aver_sD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_sD, "aver_s.d ${reg1}, ${reg2}, ${reg3}"), + "aver_s.d"); +} + +TEST_F(AssemblerMIPS32r6Test, Aver_uB) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_uB, "aver_u.b ${reg1}, ${reg2}, ${reg3}"), + "aver_u.b"); +} + +TEST_F(AssemblerMIPS32r6Test, Aver_uH) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_uH, "aver_u.h ${reg1}, ${reg2}, ${reg3}"), + "aver_u.h"); +} + +TEST_F(AssemblerMIPS32r6Test, Aver_uW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_uW, "aver_u.w ${reg1}, ${reg2}, ${reg3}"), + "aver_u.w"); +} + +TEST_F(AssemblerMIPS32r6Test, Aver_uD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_uD, "aver_u.d ${reg1}, ${reg2}, ${reg3}"), + "aver_u.d"); +} + +TEST_F(AssemblerMIPS32r6Test, Max_sB) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Max_sB, "max_s.b ${reg1}, ${reg2}, ${reg3}"), + "max_s.b"); +} + +TEST_F(AssemblerMIPS32r6Test, Max_sH) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Max_sH, "max_s.h ${reg1}, ${reg2}, ${reg3}"), + "max_s.h"); +} + +TEST_F(AssemblerMIPS32r6Test, Max_sW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Max_sW, "max_s.w ${reg1}, ${reg2}, ${reg3}"), + "max_s.w"); +} + +TEST_F(AssemblerMIPS32r6Test, Max_sD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Max_sD, "max_s.d ${reg1}, ${reg2}, ${reg3}"), + "max_s.d"); +} + +TEST_F(AssemblerMIPS32r6Test, Max_uB) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Max_uB, "max_u.b ${reg1}, ${reg2}, ${reg3}"), + "max_u.b"); +} + +TEST_F(AssemblerMIPS32r6Test, Max_uH) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Max_uH, "max_u.h ${reg1}, ${reg2}, ${reg3}"), + "max_u.h"); +} + +TEST_F(AssemblerMIPS32r6Test, Max_uW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Max_uW, "max_u.w ${reg1}, ${reg2}, ${reg3}"), + "max_u.w"); +} + +TEST_F(AssemblerMIPS32r6Test, Max_uD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Max_uD, "max_u.d ${reg1}, ${reg2}, ${reg3}"), + "max_u.d"); +} + +TEST_F(AssemblerMIPS32r6Test, Min_sB) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Min_sB, "min_s.b ${reg1}, ${reg2}, ${reg3}"), + "min_s.b"); +} + +TEST_F(AssemblerMIPS32r6Test, Min_sH) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Min_sH, "min_s.h ${reg1}, ${reg2}, ${reg3}"), + "min_s.h"); +} + +TEST_F(AssemblerMIPS32r6Test, Min_sW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Min_sW, "min_s.w ${reg1}, ${reg2}, ${reg3}"), + "min_s.w"); +} + +TEST_F(AssemblerMIPS32r6Test, Min_sD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Min_sD, "min_s.d ${reg1}, ${reg2}, ${reg3}"), + "min_s.d"); +} + +TEST_F(AssemblerMIPS32r6Test, Min_uB) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Min_uB, "min_u.b ${reg1}, ${reg2}, ${reg3}"), + "min_u.b"); +} + +TEST_F(AssemblerMIPS32r6Test, Min_uH) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Min_uH, "min_u.h ${reg1}, ${reg2}, ${reg3}"), + "min_u.h"); +} + +TEST_F(AssemblerMIPS32r6Test, Min_uW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Min_uW, "min_u.w ${reg1}, ${reg2}, ${reg3}"), + "min_u.w"); +} + +TEST_F(AssemblerMIPS32r6Test, Min_uD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::Min_uD, "min_u.d ${reg1}, ${reg2}, ${reg3}"), + "min_u.d"); +} + +TEST_F(AssemblerMIPS32r6Test, FaddW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::FaddW, "fadd.w ${reg1}, ${reg2}, ${reg3}"), "fadd.w"); +} + +TEST_F(AssemblerMIPS32r6Test, FaddD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::FaddD, "fadd.d ${reg1}, ${reg2}, ${reg3}"), "fadd.d"); +} + +TEST_F(AssemblerMIPS32r6Test, FsubW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::FsubW, "fsub.w ${reg1}, ${reg2}, ${reg3}"), "fsub.w"); +} + +TEST_F(AssemblerMIPS32r6Test, FsubD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::FsubD, "fsub.d ${reg1}, ${reg2}, ${reg3}"), "fsub.d"); +} + +TEST_F(AssemblerMIPS32r6Test, FmulW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::FmulW, "fmul.w ${reg1}, ${reg2}, ${reg3}"), "fmul.w"); +} + +TEST_F(AssemblerMIPS32r6Test, FmulD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::FmulD, "fmul.d ${reg1}, ${reg2}, ${reg3}"), "fmul.d"); +} + +TEST_F(AssemblerMIPS32r6Test, FdivW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::FdivW, "fdiv.w ${reg1}, ${reg2}, ${reg3}"), "fdiv.w"); +} + +TEST_F(AssemblerMIPS32r6Test, FdivD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::FdivD, "fdiv.d ${reg1}, ${reg2}, ${reg3}"), "fdiv.d"); +} + +TEST_F(AssemblerMIPS32r6Test, FmaxW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::FmaxW, "fmax.w ${reg1}, ${reg2}, ${reg3}"), "fmax.w"); +} + +TEST_F(AssemblerMIPS32r6Test, FmaxD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::FmaxD, "fmax.d ${reg1}, ${reg2}, ${reg3}"), "fmax.d"); +} + +TEST_F(AssemblerMIPS32r6Test, FminW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::FminW, "fmin.w ${reg1}, ${reg2}, ${reg3}"), "fmin.w"); +} + +TEST_F(AssemblerMIPS32r6Test, FminD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::FminD, "fmin.d ${reg1}, ${reg2}, ${reg3}"), "fmin.d"); +} + +TEST_F(AssemblerMIPS32r6Test, Ffint_sW) { + DriverStr(RepeatVV(&mips::MipsAssembler::Ffint_sW, "ffint_s.w ${reg1}, ${reg2}"), "ffint_s.w"); +} + +TEST_F(AssemblerMIPS32r6Test, Ffint_sD) { + DriverStr(RepeatVV(&mips::MipsAssembler::Ffint_sD, "ffint_s.d ${reg1}, ${reg2}"), "ffint_s.d"); +} + +TEST_F(AssemblerMIPS32r6Test, Ftint_sW) { + DriverStr(RepeatVV(&mips::MipsAssembler::Ftint_sW, "ftint_s.w ${reg1}, ${reg2}"), "ftint_s.w"); +} + +TEST_F(AssemblerMIPS32r6Test, Ftint_sD) { + DriverStr(RepeatVV(&mips::MipsAssembler::Ftint_sD, "ftint_s.d ${reg1}, ${reg2}"), "ftint_s.d"); +} + +TEST_F(AssemblerMIPS32r6Test, SllB) { + DriverStr(RepeatVVV(&mips::MipsAssembler::SllB, "sll.b ${reg1}, ${reg2}, ${reg3}"), "sll.b"); +} + +TEST_F(AssemblerMIPS32r6Test, SllH) { + DriverStr(RepeatVVV(&mips::MipsAssembler::SllH, "sll.h ${reg1}, ${reg2}, ${reg3}"), "sll.h"); +} + +TEST_F(AssemblerMIPS32r6Test, SllW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::SllW, "sll.w ${reg1}, ${reg2}, ${reg3}"), "sll.w"); +} + +TEST_F(AssemblerMIPS32r6Test, SllD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::SllD, "sll.d ${reg1}, ${reg2}, ${reg3}"), "sll.d"); +} + +TEST_F(AssemblerMIPS32r6Test, SraB) { + DriverStr(RepeatVVV(&mips::MipsAssembler::SraB, "sra.b ${reg1}, ${reg2}, ${reg3}"), "sra.b"); +} + +TEST_F(AssemblerMIPS32r6Test, SraH) { + DriverStr(RepeatVVV(&mips::MipsAssembler::SraH, "sra.h ${reg1}, ${reg2}, ${reg3}"), "sra.h"); +} + +TEST_F(AssemblerMIPS32r6Test, SraW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::SraW, "sra.w ${reg1}, ${reg2}, ${reg3}"), "sra.w"); +} + +TEST_F(AssemblerMIPS32r6Test, SraD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::SraD, "sra.d ${reg1}, ${reg2}, ${reg3}"), "sra.d"); +} + +TEST_F(AssemblerMIPS32r6Test, SrlB) { + DriverStr(RepeatVVV(&mips::MipsAssembler::SrlB, "srl.b ${reg1}, ${reg2}, ${reg3}"), "srl.b"); +} + +TEST_F(AssemblerMIPS32r6Test, SrlH) { + DriverStr(RepeatVVV(&mips::MipsAssembler::SrlH, "srl.h ${reg1}, ${reg2}, ${reg3}"), "srl.h"); +} + +TEST_F(AssemblerMIPS32r6Test, SrlW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::SrlW, "srl.w ${reg1}, ${reg2}, ${reg3}"), "srl.w"); +} + +TEST_F(AssemblerMIPS32r6Test, SrlD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::SrlD, "srl.d ${reg1}, ${reg2}, ${reg3}"), "srl.d"); +} + +TEST_F(AssemblerMIPS32r6Test, SlliB) { + DriverStr(RepeatVVIb(&mips::MipsAssembler::SlliB, 3, "slli.b ${reg1}, ${reg2}, {imm}"), "slli.b"); +} + +TEST_F(AssemblerMIPS32r6Test, SlliH) { + DriverStr(RepeatVVIb(&mips::MipsAssembler::SlliH, 4, "slli.h ${reg1}, ${reg2}, {imm}"), "slli.h"); +} + +TEST_F(AssemblerMIPS32r6Test, SlliW) { + DriverStr(RepeatVVIb(&mips::MipsAssembler::SlliW, 5, "slli.w ${reg1}, ${reg2}, {imm}"), "slli.w"); +} + +TEST_F(AssemblerMIPS32r6Test, SlliD) { + DriverStr(RepeatVVIb(&mips::MipsAssembler::SlliD, 6, "slli.d ${reg1}, ${reg2}, {imm}"), "slli.d"); +} + +TEST_F(AssemblerMIPS32r6Test, MoveV) { + DriverStr(RepeatVV(&mips::MipsAssembler::MoveV, "move.v ${reg1}, ${reg2}"), "move.v"); +} + +TEST_F(AssemblerMIPS32r6Test, SplatiB) { + DriverStr(RepeatVVIb(&mips::MipsAssembler::SplatiB, 4, "splati.b ${reg1}, ${reg2}[{imm}]"), + "splati.b"); +} + +TEST_F(AssemblerMIPS32r6Test, SplatiH) { + DriverStr(RepeatVVIb(&mips::MipsAssembler::SplatiH, 3, "splati.h ${reg1}, ${reg2}[{imm}]"), + "splati.h"); +} + +TEST_F(AssemblerMIPS32r6Test, SplatiW) { + DriverStr(RepeatVVIb(&mips::MipsAssembler::SplatiW, 2, "splati.w ${reg1}, ${reg2}[{imm}]"), + "splati.w"); +} + +TEST_F(AssemblerMIPS32r6Test, SplatiD) { + DriverStr(RepeatVVIb(&mips::MipsAssembler::SplatiD, 1, "splati.d ${reg1}, ${reg2}[{imm}]"), + "splati.d"); +} + +TEST_F(AssemblerMIPS32r6Test, FillB) { + DriverStr(RepeatVR(&mips::MipsAssembler::FillB, "fill.b ${reg1}, ${reg2}"), "fill.b"); +} + +TEST_F(AssemblerMIPS32r6Test, FillH) { + DriverStr(RepeatVR(&mips::MipsAssembler::FillH, "fill.h ${reg1}, ${reg2}"), "fill.h"); +} + +TEST_F(AssemblerMIPS32r6Test, FillW) { + DriverStr(RepeatVR(&mips::MipsAssembler::FillW, "fill.w ${reg1}, ${reg2}"), "fill.w"); +} + +TEST_F(AssemblerMIPS32r6Test, LdiB) { + DriverStr(RepeatVIb(&mips::MipsAssembler::LdiB, -8, "ldi.b ${reg}, {imm}"), "ldi.b"); +} + +TEST_F(AssemblerMIPS32r6Test, LdiH) { + DriverStr(RepeatVIb(&mips::MipsAssembler::LdiH, -10, "ldi.h ${reg}, {imm}"), "ldi.h"); +} + +TEST_F(AssemblerMIPS32r6Test, LdiW) { + DriverStr(RepeatVIb(&mips::MipsAssembler::LdiW, -10, "ldi.w ${reg}, {imm}"), "ldi.w"); +} + +TEST_F(AssemblerMIPS32r6Test, LdiD) { + DriverStr(RepeatVIb(&mips::MipsAssembler::LdiD, -10, "ldi.d ${reg}, {imm}"), "ldi.d"); +} + +TEST_F(AssemblerMIPS32r6Test, LdB) { + DriverStr(RepeatVRIb(&mips::MipsAssembler::LdB, -10, "ld.b ${reg1}, {imm}(${reg2})"), "ld.b"); +} + +TEST_F(AssemblerMIPS32r6Test, LdH) { + DriverStr(RepeatVRIb(&mips::MipsAssembler::LdH, -10, "ld.h ${reg1}, {imm}(${reg2})", 0, 2), + "ld.h"); +} + +TEST_F(AssemblerMIPS32r6Test, LdW) { + DriverStr(RepeatVRIb(&mips::MipsAssembler::LdW, -10, "ld.w ${reg1}, {imm}(${reg2})", 0, 4), + "ld.w"); +} + +TEST_F(AssemblerMIPS32r6Test, LdD) { + DriverStr(RepeatVRIb(&mips::MipsAssembler::LdD, -10, "ld.d ${reg1}, {imm}(${reg2})", 0, 8), + "ld.d"); +} + +TEST_F(AssemblerMIPS32r6Test, StB) { + DriverStr(RepeatVRIb(&mips::MipsAssembler::StB, -10, "st.b ${reg1}, {imm}(${reg2})"), "st.b"); +} + +TEST_F(AssemblerMIPS32r6Test, StH) { + DriverStr(RepeatVRIb(&mips::MipsAssembler::StH, -10, "st.h ${reg1}, {imm}(${reg2})", 0, 2), + "st.h"); +} + +TEST_F(AssemblerMIPS32r6Test, StW) { + DriverStr(RepeatVRIb(&mips::MipsAssembler::StW, -10, "st.w ${reg1}, {imm}(${reg2})", 0, 4), + "st.w"); +} + +TEST_F(AssemblerMIPS32r6Test, StD) { + DriverStr(RepeatVRIb(&mips::MipsAssembler::StD, -10, "st.d ${reg1}, {imm}(${reg2})", 0, 8), + "st.d"); +} + +TEST_F(AssemblerMIPS32r6Test, IlvrB) { + DriverStr(RepeatVVV(&mips::MipsAssembler::IlvrB, "ilvr.b ${reg1}, ${reg2}, ${reg3}"), "ilvr.b"); +} + +TEST_F(AssemblerMIPS32r6Test, IlvrH) { + DriverStr(RepeatVVV(&mips::MipsAssembler::IlvrH, "ilvr.h ${reg1}, ${reg2}, ${reg3}"), "ilvr.h"); +} + +TEST_F(AssemblerMIPS32r6Test, IlvrW) { + DriverStr(RepeatVVV(&mips::MipsAssembler::IlvrW, "ilvr.w ${reg1}, ${reg2}, ${reg3}"), "ilvr.w"); +} + +TEST_F(AssemblerMIPS32r6Test, IlvrD) { + DriverStr(RepeatVVV(&mips::MipsAssembler::IlvrD, "ilvr.d ${reg1}, ${reg2}, ${reg3}"), "ilvr.d"); +} + #undef __ } // namespace art diff --git a/compiler/utils/mips/constants_mips.h b/compiler/utils/mips/constants_mips.h index 44ed5cc124..b4dfdbd8d3 100644 --- a/compiler/utils/mips/constants_mips.h +++ b/compiler/utils/mips/constants_mips.h @@ -75,8 +75,37 @@ enum InstructionFields { kFdShift = 6, kFdBits = 5, + kMsaOperationShift = 23, + kMsaELMOperationShift = 22, + kMsa2ROperationShift = 18, + kMsa2RFOperationShift = 17, + kDfShift = 21, + kDfMShift = 16, + kDf2RShift = 16, + kDfNShift = 16, + kWtShift = 16, + kWtBits = 5, + kWsShift = 11, + kWsBits = 5, + kWdShift = 6, + kWdBits = 5, + kS10Shift = 16, + kI10Shift = 11, + kS10MinorShift = 2, + kBranchOffsetMask = 0x0000ffff, kJumpOffsetMask = 0x03ffffff, + + kMsaMajorOpcode = 0x1e, + kMsaDfMByteMask = 0x70, + kMsaDfMHalfwordMask = 0x60, + kMsaDfMWordMask = 0x40, + kMsaDfMDoublewordMask = 0x00, + kMsaDfNByteMask = 0x00, + kMsaDfNHalfwordMask = 0x20, + kMsaDfNWordMask = 0x30, + kMsaDfNDoublewordMask = 0x38, + kMsaS10Mask = 0x3ff, }; enum ScaleFactor { diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index b8b800abe3..24900a7f10 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -1456,6 +1456,86 @@ void Mips64Assembler::Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegist EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x10); } +void Mips64Assembler::Max_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x2, 0x0, wt, ws, wd, 0xe); +} + +void Mips64Assembler::Max_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x2, 0x1, wt, ws, wd, 0xe); +} + +void Mips64Assembler::Max_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x2, 0x2, wt, ws, wd, 0xe); +} + +void Mips64Assembler::Max_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x2, 0x3, wt, ws, wd, 0xe); +} + +void Mips64Assembler::Max_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x3, 0x0, wt, ws, wd, 0xe); +} + +void Mips64Assembler::Max_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x3, 0x1, wt, ws, wd, 0xe); +} + +void Mips64Assembler::Max_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x3, 0x2, wt, ws, wd, 0xe); +} + +void Mips64Assembler::Max_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x3, 0x3, wt, ws, wd, 0xe); +} + +void Mips64Assembler::Min_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x4, 0x0, wt, ws, wd, 0xe); +} + +void Mips64Assembler::Min_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x4, 0x1, wt, ws, wd, 0xe); +} + +void Mips64Assembler::Min_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x4, 0x2, wt, ws, wd, 0xe); +} + +void Mips64Assembler::Min_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x4, 0x3, wt, ws, wd, 0xe); +} + +void Mips64Assembler::Min_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x0, wt, ws, wd, 0xe); +} + +void Mips64Assembler::Min_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x1, wt, ws, wd, 0xe); +} + +void Mips64Assembler::Min_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x2, wt, ws, wd, 0xe); +} + +void Mips64Assembler::Min_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x3, wt, ws, wd, 0xe); +} + void Mips64Assembler::FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { CHECK(HasMsa()); EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1b); @@ -1496,6 +1576,26 @@ void Mips64Assembler::FdivD(VectorRegister wd, VectorRegister ws, VectorRegister EmitMsa3R(0x1, 0x3, wt, ws, wd, 0x1b); } +void Mips64Assembler::FmaxW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FmaxD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FminW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FminD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x1b); +} + void Mips64Assembler::Ffint_sW(VectorRegister wd, VectorRegister ws) { CHECK(HasMsa()); EmitMsa2RF(0x19e, 0x0, ws, wd, 0x1e); diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 9b4064543f..773db9b208 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -25,6 +25,7 @@ #include "base/arena_containers.h" #include "base/enums.h" #include "base/macros.h" +#include "base/stl_util_identity.h" #include "constants_mips64.h" #include "globals.h" #include "managed_register_mips64.h" @@ -704,6 +705,22 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); void Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); void Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Max_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Max_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Max_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Max_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Max_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Max_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Max_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Max_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Min_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Min_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Min_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Min_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Min_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Min_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Min_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Min_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); void FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt); void FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt); @@ -713,6 +730,10 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt); void FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt); void FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FmaxW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FmaxD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FminW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FminD(VectorRegister wd, VectorRegister ws, VectorRegister wt); void Ffint_sW(VectorRegister wd, VectorRegister ws); void Ffint_sD(VectorRegister wd, VectorRegister ws); diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index fbebe0ce15..bdf9598ee7 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -2998,6 +2998,86 @@ TEST_F(AssemblerMIPS64Test, Aver_uD) { "aver_u.d"); } +TEST_F(AssemblerMIPS64Test, Max_sB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Max_sB, "max_s.b ${reg1}, ${reg2}, ${reg3}"), + "max_s.b"); +} + +TEST_F(AssemblerMIPS64Test, Max_sH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Max_sH, "max_s.h ${reg1}, ${reg2}, ${reg3}"), + "max_s.h"); +} + +TEST_F(AssemblerMIPS64Test, Max_sW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Max_sW, "max_s.w ${reg1}, ${reg2}, ${reg3}"), + "max_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Max_sD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Max_sD, "max_s.d ${reg1}, ${reg2}, ${reg3}"), + "max_s.d"); +} + +TEST_F(AssemblerMIPS64Test, Max_uB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Max_uB, "max_u.b ${reg1}, ${reg2}, ${reg3}"), + "max_u.b"); +} + +TEST_F(AssemblerMIPS64Test, Max_uH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Max_uH, "max_u.h ${reg1}, ${reg2}, ${reg3}"), + "max_u.h"); +} + +TEST_F(AssemblerMIPS64Test, Max_uW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Max_uW, "max_u.w ${reg1}, ${reg2}, ${reg3}"), + "max_u.w"); +} + +TEST_F(AssemblerMIPS64Test, Max_uD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Max_uD, "max_u.d ${reg1}, ${reg2}, ${reg3}"), + "max_u.d"); +} + +TEST_F(AssemblerMIPS64Test, Min_sB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Min_sB, "min_s.b ${reg1}, ${reg2}, ${reg3}"), + "min_s.b"); +} + +TEST_F(AssemblerMIPS64Test, Min_sH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Min_sH, "min_s.h ${reg1}, ${reg2}, ${reg3}"), + "min_s.h"); +} + +TEST_F(AssemblerMIPS64Test, Min_sW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Min_sW, "min_s.w ${reg1}, ${reg2}, ${reg3}"), + "min_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Min_sD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Min_sD, "min_s.d ${reg1}, ${reg2}, ${reg3}"), + "min_s.d"); +} + +TEST_F(AssemblerMIPS64Test, Min_uB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Min_uB, "min_u.b ${reg1}, ${reg2}, ${reg3}"), + "min_u.b"); +} + +TEST_F(AssemblerMIPS64Test, Min_uH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Min_uH, "min_u.h ${reg1}, ${reg2}, ${reg3}"), + "min_u.h"); +} + +TEST_F(AssemblerMIPS64Test, Min_uW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Min_uW, "min_u.w ${reg1}, ${reg2}, ${reg3}"), + "min_u.w"); +} + +TEST_F(AssemblerMIPS64Test, Min_uD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Min_uD, "min_u.d ${reg1}, ${reg2}, ${reg3}"), + "min_u.d"); +} + TEST_F(AssemblerMIPS64Test, FaddW) { DriverStr(RepeatVVV(&mips64::Mips64Assembler::FaddW, "fadd.w ${reg1}, ${reg2}, ${reg3}"), "fadd.w"); @@ -3038,6 +3118,26 @@ TEST_F(AssemblerMIPS64Test, FdivD) { "fdiv.d"); } +TEST_F(AssemblerMIPS64Test, FmaxW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FmaxW, "fmax.w ${reg1}, ${reg2}, ${reg3}"), + "fmax.w"); +} + +TEST_F(AssemblerMIPS64Test, FmaxD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FmaxD, "fmax.d ${reg1}, ${reg2}, ${reg3}"), + "fmax.d"); +} + +TEST_F(AssemblerMIPS64Test, FminW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FminW, "fmin.w ${reg1}, ${reg2}, ${reg3}"), + "fmin.w"); +} + +TEST_F(AssemblerMIPS64Test, FminD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FminD, "fmin.d ${reg1}, ${reg2}, ${reg3}"), + "fmin.d"); +} + TEST_F(AssemblerMIPS64Test, Ffint_sW) { DriverStr(RepeatVV(&mips64::Mips64Assembler::Ffint_sW, "ffint_s.w ${reg1}, ${reg2}"), "ffint_s.w"); diff --git a/compiler/utils/swap_space.cc b/compiler/utils/swap_space.cc index 4f6c915142..621a652f0a 100644 --- a/compiler/utils/swap_space.cc +++ b/compiler/utils/swap_space.cc @@ -20,6 +20,7 @@ #include <numeric> #include <sys/mman.h> +#include "base/bit_utils.h" #include "base/logging.h" #include "base/macros.h" #include "base/mutex.h" |